media-libs/x264/files/.svn/text-base/x264-psyrdo-0.6.diff.svn-base

   1 diff --git a/common/common.c b/common/common.c\r
   2 index 71a29b1..301b9ed 100644\r
   3 --- a/common/common.c\r
   4 +++ b/common/common.c\r
   5 @@ -117,6 +117,7 @@ void    x264_param_default( x264_param_t *param )\r
   6                           | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16;\r
   7      param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;\r
   8      param->analyse.i_me_method = X264_ME_HEX;\r
   9 +    param->analyse.f_psy_rd = 1.0;\r
  10      param->analyse.i_me_range = 16;\r
  11      param->analyse.i_subpel_refine = 5;\r
  12      param->analyse.b_chroma_me = 1;\r
  13 @@ -464,6 +465,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )\r
  14          p->analyse.i_mv_range_thread = atoi(value);\r
  15      OPT2("subme", "subq")\r
  16          p->analyse.i_subpel_refine = atoi(value);\r
  17 +    OPT("psy-rd")\r
  18 +        p->analyse.f_psy_rd = atof(value);\r
  19      OPT("bime")\r
  20          p->analyse.b_bidir_me = atobool(value);\r
  21      OPT("chroma-me")\r
  22 @@ -856,6 +859,7 @@ char *x264_param2string( x264_param_t *p, int b_res )\r
  23      s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter );\r
  24      s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] );\r
  25      s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );\r
  26 +    s += sprintf( s, " psy_rd=%f", p->analyse.f_psy_rd );\r
  27      s += sprintf( s, " brdo=%d", p->analyse.b_bframe_rdo );\r
  28      s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );\r
  29      s += sprintf( s, " me_range=%d", p->analyse.i_me_range );\r
  30 diff --git a/common/common.h b/common/common.h\r
  31 index e2792cc..fbd88fd 100644\r
  32 --- a/common/common.h\r
  33 +++ b/common/common.h\r
  34 @@ -454,6 +454,12 @@ struct x264_t\r
  35              DECLARE_ALIGNED_16( int16_t i8x8_dct_buf[3][64] );\r
  36              DECLARE_ALIGNED_16( int16_t i4x4_dct_buf[15][16] );\r
  37  \r
  38 +            /* SATD scores for psy RD */\r
  39 +            int fenc_satd[4][4];\r
  40 +            int fenc_satd_sum;\r
  41 +            int fenc_sa8d[2][2];\r
  42 +            int fenc_sa8d_sum;\r
  43 +\r
  44              /* pointer over mb of the frame to be compressed */\r
  45              uint8_t *p_fenc[3];\r
  46  \r
  47 diff --git a/encoder/analyse.c b/encoder/analyse.c\r
  48 index 270b90a..25346b4 100644\r
  49 --- a/encoder/analyse.c\r
  50 +++ b/encoder/analyse.c\r
  51 @@ -467,6 +467,34 @@ static void predict_4x4_mode_available( unsigned int i_neighbour,\r
  52      }\r
  53  }\r
  54  \r
  55 +/* Pre-calculate fenc satd scores for psy RD, minus DC coefficients */\r
  56 +static inline void x264_mb_cache_fenc_satd( x264_t *h )\r
  57 +{\r
  58 +    DECLARE_ALIGNED_16(uint8_t zero[16]) = {0};\r
  59 +    uint8_t *fenc;\r
  60 +    int x, y, satd_sum = 0, sa8d_sum = 0;\r
  61 +    if( !h->param.analyse.i_psy_rd)\r
  62 +        return;\r
  63 +    for( y = 0; y < 4; y++ )\r
  64 +        for( x = 0; x < 4; x++ )\r
  65 +        {\r
  66 +            fenc = h->mb.pic.p_fenc[0]+x*4+y*4*FENC_STRIDE;\r
  67 +            h->mb.pic.fenc_satd[y][x] = h->pixf.satd[PIXEL_4x4]( zero, 0, fenc, FENC_STRIDE )\r
  68 +                                      - (h->pixf.sad[PIXEL_4x4]( zero, 0, fenc, FENC_STRIDE )>>1);\r
  69 +            satd_sum += h->mb.pic.fenc_satd[y][x];\r
  70 +        }\r
  71 +    for( y = 0; y < 2; y++ )\r
  72 +        for( x = 0; x < 2; x++ )\r
  73 +        {\r
  74 +            fenc = h->mb.pic.p_fenc[0]+x*8+y*8*FENC_STRIDE;\r
  75 +            h->mb.pic.fenc_sa8d[y][x] = h->pixf.sa8d[PIXEL_8x8]( zero, 0, fenc, FENC_STRIDE )\r
  76 +                                      - (h->pixf.sad[PIXEL_8x8]( zero, 0, fenc, FENC_STRIDE )>>2);\r
  77 +            sa8d_sum += h->mb.pic.fenc_sa8d[y][x];\r
  78 +        }\r
  79 +    h->mb.pic.fenc_satd_sum = satd_sum;\r
  80 +    h->mb.pic.fenc_sa8d_sum = sa8d_sum;\r
  81 +}\r
  82 +\r
  83  static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )\r
  84  {\r
  85      int i;\r
  86 @@ -1016,12 +1044,15 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )\r
  87      assert( a->l0.me16x16.mv[1] <= h->mb.mv_max_spel[1] || h->param.i_threads == 1 );\r
  88  \r
  89      h->mb.i_type = P_L0;\r
  90 -    if( a->b_mbrd && a->l0.me16x16.i_ref == 0\r
  91 -        && *(uint32_t*)a->l0.me16x16.mv == *(uint32_t*)h->mb.cache.pskip_mv )\r
  92 +    if( a->b_mbrd )\r
  93      {\r
  94 -        h->mb.i_partition = D_16x16;\r
  95 -        x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );\r
  96 -        a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );\r
  97 +        x264_mb_cache_fenc_satd( h );\r
  98 +        if( a->l0.me16x16.i_ref == 0 && *(uint32_t*)a->l0.me16x16.mv == *(uint32_t*)h->mb.cache.pskip_mv )\r
  99 +        {\r
 100 +            h->mb.i_partition = D_16x16;\r
 101 +            x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );\r
 102 +            a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );\r
 103 +        }\r
 104      }\r
 105  }\r
 106  \r
 107 @@ -1906,7 +1937,7 @@ static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd )\r
 108  \r
 109  static void x264_mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )\r
 110  {\r
 111 -    int thresh = i_satd_inter * 17/16;\r
 112 +    int thresh = i_satd_inter * (17 + (!!h->param.analyse.i_psy_rd))/16;\r
 113  \r
 114      if( a->b_direct_available && a->i_rd16x16direct == COST_MAX )\r
 115      {\r
 116 @@ -2067,7 +2098,10 @@ void x264_macroblock_analyse( x264_t *h )\r
 117      {\r
 118          x264_mb_analyse_intra( h, &analysis, COST_MAX );\r
 119          if( analysis.b_mbrd )\r
 120 +        {\r
 121 +            x264_mb_cache_fenc_satd( h );\r
 122              x264_intra_rd( h, &analysis, COST_MAX );\r
 123 +        }\r
 124  \r
 125          i_cost = analysis.i_satd_i16x16;\r
 126          h->mb.i_type = I_16x16;\r
 127 @@ -2342,6 +2376,9 @@ void x264_macroblock_analyse( x264_t *h )\r
 128      {\r
 129          int i_bskip_cost = COST_MAX;\r
 130          int b_skip = 0;\r
 131 +        \r
 132 +        if( analysis.b_mbrd )\r
 133 +            x264_mb_cache_fenc_satd( h );\r
 134  \r
 135          h->mb.i_type = B_SKIP;\r
 136          if( h->mb.b_direct_auto_write )\r
 137 diff --git a/encoder/encoder.c b/encoder/encoder.c\r
 138 index 2c2fe8c..ff5febe 100644\r
 139 --- a/encoder/encoder.c\r
 140 +++ b/encoder/encoder.c\r
 141 @@ -411,5 +411,6 @@ static int x264_validate_parameters( x264_t *h )\r
 142          h->param.analyse.b_fast_pskip = 0;\r
 143          h->param.analyse.i_noise_reduction = 0;\r
 144 +        h->param.analyse.f_psy_rd = 0;\r
 145      }\r
 146      if( h->param.rc.i_rc_method == X264_RC_CQP )\r
 147      {\r
 148 @@ -486,6 +487,16 @@ static int x264_validate_parameters( x264_t *h )\r
 149      if( !h->param.b_cabac )\r
 150          h->param.analyse.i_trellis = 0;\r
 151      h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );\r
 152 +    h->param.analyse.f_psy_rd = x264_clip3f( h->param.analyse.f_psy_rd, 0, 10 );\r
 153 +    if( h->param.analyse.i_subpel_refine < 6)\r
 154 +        h->param.analyse.f_psy_rd = 0;\r
 155 +    if( h->param.analyse.f_psy_rd && h->param.analyse.i_trellis == 1 )\r
 156 +    {\r
 157 +        x264_log( h, X264_LOG_WARNING, "psy RD is not compatible with trellis=1; use 0 or 2.\n" );\r
 158 +        h->param.analyse.i_trellis = 0;\r
 159 +    }\r
 160 +    if( h->param.analyse.f_psy_rd )\r
 161 +        h->param.analyse.i_psy_rd = FIX8( h->param.analyse.f_psy_rd );\r
 162      h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 2 );\r
 163      if( h->param.rc.f_aq_strength <= 0 )\r
 164          h->param.rc.i_aq_mode = 0;\r
 165 diff --git a/encoder/rdo.c b/encoder/rdo.c\r
 166 index 76bf57b..7da862a 100644\r
 167 --- a/encoder/rdo.c\r
 168 +++ b/encoder/rdo.c\r
 169 @@ -50,21 +50,82 @@ static uint16_t cabac_prefix_size[15][128];\r
 170  \r
 171  #define COPY_CABAC h->mc.memcpy_aligned( &cabac_tmp.f8_bits_encoded, &h->cabac.f8_bits_encoded, \\r
 172          sizeof(x264_cabac_t) - offsetof(x264_cabac_t,f8_bits_encoded) )\r
 173 -    \r
 174 -static int ssd_mb( x264_t *h )\r
 175 +\r
 176 +#define ADD_ABS_SATD(satdtype, pixel)\\r
 177 +    satd += abs((h->pixf.satdtype[pixel]( zero, 0, fdec, FDEC_STRIDE ) - dc_coef)\\r
 178 +          - sum_##satdtype( h, pixel, x, y ));\r
 179 +\r
 180 +/* Sum the cached SATDs to avoid repeating them. */\r
 181 +static inline int sum_satd( x264_t *h, int pixel, int x, int y )\r
 182 +{\r
 183 +    int satd = 0;\r
 184 +    int min_x = x>>2;\r
 185 +    int min_y = y>>2;\r
 186 +    int max_x = (x>>2) + (x264_pixel_size[pixel].w>>2);\r
 187 +    int max_y = (y>>2) + (x264_pixel_size[pixel].h>>2);\r
 188 +    if( pixel == PIXEL_16x16 )\r
 189 +        return h->mb.pic.fenc_satd_sum;\r
 190 +    for( y = min_y; y < max_y; y++ )\r
 191 +        for( x = min_x; x < max_x; x++ )\r
 192 +            satd += h->mb.pic.fenc_satd[y][x];\r
 193 +    return satd;\r
 194 +}\r
 195 +\r
 196 +static inline int sum_sa8d( x264_t *h, int pixel, int x, int y )\r
 197 +{\r
 198 +    int sa8d = 0;\r
 199 +    int min_x = x>>3;\r
 200 +    int min_y = y>>3;\r
 201 +    int max_x = (x>>3) + (x264_pixel_size[pixel].w>>3);\r
 202 +    int max_y = (y>>3) + (x264_pixel_size[pixel].h>>3);\r
 203 +    if( pixel == PIXEL_16x16 )\r
 204 +        return h->mb.pic.fenc_sa8d_sum;\r
 205 +    for( y = min_y; y < max_y; y++ )\r
 206 +        for( x = min_x; x < max_x; x++ )\r
 207 +            sa8d += h->mb.pic.fenc_sa8d[y][x];\r
 208 +    return sa8d;\r
 209 +}\r
 210 +\r
 211 +/* Psy RD distortion metric: SSD plus "Absolute Difference of Complexities" */\r
 212 +/* SATD and SA8D are used to measure block complexity. */\r
 213 +/* Blocks with a complexity most similar to that of the source are scored best. */\r
 214 +/* The difference between SATD and SA8D scores are both used to avoid bias from the DCT size.  Using SATD */\r
 215 +/* only, for example, results in overusage of 8x8dct, while the opposite occurs when using SA8D. */\r
 216 +/* This is because frequencies stored in an 8x8dct sum up to a larger value when viewed through a 4x4 */\r
 217 +/* transform and vice versa with a 4x4dct and an 8x8 transform. */\r
 218 +/* SSD is still used as the primary RD metric; this value is merely added to it for psy purposes. */\r
 219 +\r
 220 +/* FIXME:  Is there a better metric than averaged SATD/SA8D difference for complexity difference? */\r
 221 +/* Hadamard transform is recursive, so a SATD+SA8D can be done faster by taking advantage of this fact. */\r
 222 +/* This optimization can also be used in non-RD transform decision. */\r
 223 +\r
 224 +static inline int ssd_plane( x264_t *h, int size, int p, int x, int y )\r
 225  {\r
 226 -    return h->pixf.ssd[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE,\r
 227 -                                     h->mb.pic.p_fdec[0], FDEC_STRIDE )\r
 228 -         + h->pixf.ssd[PIXEL_8x8](   h->mb.pic.p_fenc[1], FENC_STRIDE,\r
 229 -                                     h->mb.pic.p_fdec[1], FDEC_STRIDE )\r
 230 -         + h->pixf.ssd[PIXEL_8x8](   h->mb.pic.p_fenc[2], FENC_STRIDE,\r
 231 -                                     h->mb.pic.p_fdec[2], FDEC_STRIDE );\r
 232 +    DECLARE_ALIGNED_16(uint8_t zero[16]) = {0};\r
 233 +    int satd = 0;\r
 234 +    uint8_t *fdec = h->mb.pic.p_fdec[p] + x + y*FDEC_STRIDE;\r
 235 +    uint8_t *fenc = h->mb.pic.p_fenc[p] + x + y*FENC_STRIDE;\r
 236 +    if( p == 0 && h->param.analyse.i_psy_rd )\r
 237 +    {\r
 238 +        int dc_coef = h->pixf.sad[size](zero, 0, fdec, FDEC_STRIDE) >> 1;\r
 239 +        ADD_ABS_SATD(satd, size);\r
 240 +        /* If the plane is smaller than 8x8, we can't do an SA8D; this probably isn't a big problem. */\r
 241 +        if(size <= PIXEL_8x8)\r
 242 +        {\r
 243 +            dc_coef >>= 1;\r
 244 +            ADD_ABS_SATD(sa8d, size);\r
 245 +            satd >>= 1;\r
 246 +        }\r
 247 +        satd = (satd * h->param.analyse.i_psy_rd * x264_lambda_tab[h->mb.i_qp] + 128) >> 8;\r
 248 +    }\r
 249 +    return h->pixf.ssd[size](fenc, FENC_STRIDE, fdec, FDEC_STRIDE) + satd;\r
 250  }\r
 251  \r
 252 -static int ssd_plane( x264_t *h, int size, int p, int x, int y )\r
 253 +static inline int ssd_mb( x264_t *h )\r
 254  {\r
 255 -    return h->pixf.ssd[size]( h->mb.pic.p_fenc[p] + x+y*FENC_STRIDE, FENC_STRIDE,\r
 256 -                              h->mb.pic.p_fdec[p] + x+y*FDEC_STRIDE, FDEC_STRIDE );\r
 257 +    return ssd_plane(h, PIXEL_16x16, 0, 0, 0)\r
 258 +         + ssd_plane(h, PIXEL_8x8,   1, 0, 0)\r
 259 +         + ssd_plane(h, PIXEL_8x8,   2, 0, 0);\r
 260  }\r
 261  \r
 262  static int x264_rd_cost_mb( x264_t *h, int i_lambda2 )\r
 263 diff --git a/x264.c b/x264.c\r
 264 index 14466e5..96326cd 100644\r
 265 --- a/x264.c\r
 266 +++ b/x264.c\r
 267 @@ -243,6 +243,9 @@ static void Help( x264_param_t *defaults, int b_longhelp )\r
 268      H0( "  -m, --subme <integer>       Subpixel motion estimation and partition\n"\r
 269          "                                  decision quality: 1=fast, 7=best. [%d]\n", defaults->analyse.i_subpel_refine );\r
 270      H0( "      --b-rdo                 RD based mode decision for B-frames. Requires subme 6.\n" );\r
 271 +    H0( "      --psy-rd                Strength of mode decision psychovisual optimization [\"%f\"]\n"\r
 272 +        "                              Does nothing at subme < 6.",\r
 273 +                                       defaults->analyse.f_psy_rd );\r
 274      H0( "      --mixed-refs            Decide references on a per partition basis\n" );\r
 275      H1( "      --no-chroma-me          Ignore chroma in motion estimation\n" );\r
 276      H1( "      --bime                  Jointly optimize both MVs in B-frames\n" );\r
 277 @@ -411,6 +414,7 @@ static int  Parse( int argc, char **argv,\r
 278              { "mvrange", required_argument, NULL, 0 },\r
 279              { "mvrange-thread", required_argument, NULL, 0 },\r
 280              { "subme",   required_argument, NULL, 'm' },\r
 281 +            { "psy-rd",   required_argument, NULL, 0 },\r
 282              { "b-rdo",   no_argument,       NULL, 0 },\r
 283              { "mixed-refs", no_argument,    NULL, 0 },\r
 284              { "no-chroma-me", no_argument,  NULL, 0 },\r
 285 diff --git a/x264.h b/x264.h\r
 286 index 3b678dc..02266c1 100644\r
 287 --- a/x264.h\r
 288 +++ b/x264.h\r
 289 @@ -239,6 +239,8 @@ typedef struct x264_param_t\r
 290          int          b_fast_pskip; /* early SKIP detection on P-frames */\r
 291          int          b_dct_decimate; /* transform coefficient thresholding on P-frames */\r
 292          int          i_noise_reduction; /* adaptive pseudo-deadzone */\r
 293 +        float        f_psy_rd; /* Psy RD strength */\r
 294 +        int          i_psy_rd; /* Psy RD strength--fixed point value*/\r
 295  \r
 296          /* the deadzone size that will be used in luma quantization */\r
 297          int          i_luma_deadzone[2]; /* {inter, intra} */\r