1 diff --git a/common/common.c b/common/common.c
\r
2 index 71a29b1..21155eb 100644
\r
3 --- a/common/common.c
\r
4 +++ b/common/common.c
\r
5 @@ -117,6 +117,7 @@ void x264_param_default( x264_param_t *param )
\r
6 | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16;
\r
7 param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
\r
8 param->analyse.i_me_method = X264_ME_HEX;
\r
9 + param->analyse.f_psy_rd = 1.0;
\r
10 param->analyse.i_me_range = 16;
\r
11 param->analyse.i_subpel_refine = 5;
\r
12 param->analyse.b_chroma_me = 1;
\r
13 @@ -464,6 +465,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
\r
14 p->analyse.i_mv_range_thread = atoi(value);
\r
15 OPT2("subme", "subq")
\r
16 p->analyse.i_subpel_refine = atoi(value);
\r
18 + p->analyse.f_psy_rd = atof(value);
\r
20 p->analyse.b_bidir_me = atobool(value);
\r
22 @@ -856,6 +859,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
\r
23 s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter );
\r
24 s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] );
\r
25 s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );
\r
26 + s += sprintf( s, " psy_rd=%.1f", p->analyse.f_psy_rd );
\r
27 s += sprintf( s, " brdo=%d", p->analyse.b_bframe_rdo );
\r
28 s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );
\r
29 s += sprintf( s, " me_range=%d", p->analyse.i_me_range );
\r
30 diff --git a/common/common.h b/common/common.h
\r
31 index 80648a8..78cdb05 100644
\r
32 --- a/common/common.h
\r
33 +++ b/common/common.h
\r
34 @@ -342,8 +342,6 @@ struct x264_t
\r
35 x264_frame_t *fref1[16+3]; /* ref list 1 */
\r
36 int b_ref_reorder[2];
\r
40 /* Current MB DCT coeffs */
\r
43 @@ -454,6 +452,16 @@ struct x264_t
\r
44 DECLARE_ALIGNED_16( int16_t i8x8_dct_buf[3][64] );
\r
45 DECLARE_ALIGNED_16( int16_t i4x4_dct_buf[15][16] );
\r
47 + /* Psy trellis DCT data */
\r
48 + DECLARE_ALIGNED_16( int16_t fenc_dct8[4][64] );
\r
49 + DECLARE_ALIGNED_16( int16_t fenc_dct4[16][16] );
\r
51 + /* SATD scores for psy RD */
\r
52 + int fenc_satd[4][4];
\r
53 + int fenc_satd_sum;
\r
54 + int fenc_sa8d[2][2];
\r
55 + int fenc_sa8d_sum;
\r
57 /* pointer over mb of the frame to be compressed */
\r
60 diff --git a/common/dct.h b/common/dct.h
\r
61 index 1078023..daa96f4 100644
\r
64 @@ -41,6 +41,17 @@ static const uint16_t x264_dct8_weight_tab[64] = {
\r
68 +#define W(i) (i==0 ? FIX8(1.76777) :\
\r
69 + i==1 ? FIX8(1.11803) :\
\r
70 + i==2 ? FIX8(0.70711) :0)
\r
71 +static const uint16_t x264_dct4_weight_tab[16] = {
\r
72 + W(0), W(1), W(0), W(1),
\r
73 + W(1), W(2), W(1), W(2),
\r
74 + W(0), W(1), W(0), W(1),
\r
75 + W(1), W(2), W(1), W(2)
\r
79 /* inverse squared */
\r
80 #define W(i) (i==0 ? FIX8(3.125) :\
\r
81 i==1 ? FIX8(1.25) :\
\r
82 diff --git a/encoder/analyse.c b/encoder/analyse.c
\r
83 index 270b90a..59cc89b 100644
\r
84 --- a/encoder/analyse.c
\r
85 +++ b/encoder/analyse.c
\r
86 @@ -467,6 +467,34 @@ static void predict_4x4_mode_available( unsigned int i_neighbour,
\r
90 +/* Pre-calculate fenc satd scores for psy RD, minus DC coefficients */
\r
91 +static inline void x264_mb_cache_fenc_satd( x264_t *h )
\r
93 + DECLARE_ALIGNED_16(uint8_t zero[16]) = {0};
\r
95 + int x, y, satd_sum = 0, sa8d_sum = 0;
\r
96 + if( !h->param.analyse.i_psy_rd )
\r
98 + for( y = 0; y < 4; y++ )
\r
99 + for( x = 0; x < 4; x++ )
\r
101 + fenc = h->mb.pic.p_fenc[0]+x*4+y*4*FENC_STRIDE;
\r
102 + h->mb.pic.fenc_satd[y][x] = h->pixf.satd[PIXEL_4x4]( zero, 0, fenc, FENC_STRIDE )
\r
103 + - (h->pixf.sad[PIXEL_4x4]( zero, 0, fenc, FENC_STRIDE )>>1);
\r
104 + satd_sum += h->mb.pic.fenc_satd[y][x];
\r
106 + for( y = 0; y < 2; y++ )
\r
107 + for( x = 0; x < 2; x++ )
\r
109 + fenc = h->mb.pic.p_fenc[0]+x*8+y*8*FENC_STRIDE;
\r
110 + h->mb.pic.fenc_sa8d[y][x] = h->pixf.sa8d[PIXEL_8x8]( zero, 0, fenc, FENC_STRIDE )
\r
111 + - (h->pixf.sad[PIXEL_8x8]( zero, 0, fenc, FENC_STRIDE )>>2);
\r
112 + sa8d_sum += h->mb.pic.fenc_sa8d[y][x];
\r
114 + h->mb.pic.fenc_satd_sum = satd_sum;
\r
115 + h->mb.pic.fenc_sa8d_sum = sa8d_sum;
\r
118 static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
\r
121 @@ -1016,12 +1044,15 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
\r
122 assert( a->l0.me16x16.mv[1] <= h->mb.mv_max_spel[1] || h->param.i_threads == 1 );
\r
124 h->mb.i_type = P_L0;
\r
125 - if( a->b_mbrd && a->l0.me16x16.i_ref == 0
\r
126 - && *(uint32_t*)a->l0.me16x16.mv == *(uint32_t*)h->mb.cache.pskip_mv )
\r
129 - h->mb.i_partition = D_16x16;
\r
130 - x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
\r
131 - a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
\r
132 + x264_mb_cache_fenc_satd( h );
\r
133 + if( a->l0.me16x16.i_ref == 0 && *(uint32_t*)a->l0.me16x16.mv == *(uint32_t*)h->mb.cache.pskip_mv )
\r
135 + h->mb.i_partition = D_16x16;
\r
136 + x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
\r
137 + a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
\r
142 @@ -1906,7 +1937,7 @@ static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd )
\r
144 static void x264_mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
\r
146 - int thresh = i_satd_inter * 17/16;
\r
147 + int thresh = i_satd_inter * (17 + (!!h->param.analyse.i_psy_rd))/16;
\r
149 if( a->b_direct_available && a->i_rd16x16direct == COST_MAX )
\r
151 @@ -2046,6 +2077,21 @@ static inline void x264_mb_analyse_transform_rd( x264_t *h, x264_mb_analysis_t *
\r
155 +void x264_psy_trellis_init( x264_t *h )
\r
157 + DECLARE_ALIGNED_16( int16_t dct8x8[4][8][8] );
\r
158 + DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
\r
159 + DECLARE_ALIGNED_16( uint8_t zero[16*FDEC_STRIDE] ) = {0};
\r
162 + /* For psy trellis: do DCT on input data */
\r
163 + h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], zero );
\r
164 + for( i = 0; i < 4; i++ )
\r
165 + h->zigzagf.scan_8x8( h->mb.pic.fenc_dct8[i], dct8x8[i] );
\r
166 + h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], zero );
\r
167 + for( i = 0; i < 16; i++ )
\r
168 + h->zigzagf.scan_4x4( h->mb.pic.fenc_dct4[i], dct4x4[i] );
\r
171 /*****************************************************************************
\r
172 * x264_macroblock_analyse:
\r
173 @@ -2062,12 +2108,18 @@ void x264_macroblock_analyse( x264_t *h )
\r
175 x264_mb_analyse_init( h, &analysis, h->mb.i_qp );
\r
177 + if( h->param.analyse.i_psy_rd && h->param.analyse.i_trellis )
\r
178 + x264_psy_trellis_init( h );
\r
180 /*--------------------------- Do the analysis ---------------------------*/
\r
181 if( h->sh.i_type == SLICE_TYPE_I )
\r
183 x264_mb_analyse_intra( h, &analysis, COST_MAX );
\r
184 if( analysis.b_mbrd )
\r
186 + x264_mb_cache_fenc_satd( h );
\r
187 x264_intra_rd( h, &analysis, COST_MAX );
\r
190 i_cost = analysis.i_satd_i16x16;
\r
191 h->mb.i_type = I_16x16;
\r
192 @@ -2342,6 +2394,9 @@ void x264_macroblock_analyse( x264_t *h )
\r
194 int i_bskip_cost = COST_MAX;
\r
197 + if( analysis.b_mbrd )
\r
198 + x264_mb_cache_fenc_satd( h );
\r
200 h->mb.i_type = B_SKIP;
\r
201 if( h->mb.b_direct_auto_write )
\r
202 diff --git a/encoder/encoder.c b/encoder/encoder.c
\r
203 index 8f1ebac..b1d5f7b 100644
\r
204 --- a/encoder/encoder.c
\r
205 +++ b/encoder/encoder.c
\r
206 @@ -411,5 +411,6 @@ static int x264_validate_parameters( x264_t *h )
\r
207 h->param.analyse.b_fast_pskip = 0;
\r
208 h->param.analyse.i_noise_reduction = 0;
\r
209 + h->param.analyse.f_psy_rd = 0;
\r
211 if( h->param.rc.i_rc_method == X264_RC_CQP )
\r
213 @@ -482,10 +483,23 @@ static int x264_validate_parameters( x264_t *h )
\r
214 h->param.analyse.inter &= ~X264_ANALYSE_I8x8;
\r
215 h->param.analyse.intra &= ~X264_ANALYSE_I8x8;
\r
217 - h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
\r
218 if( !h->param.b_cabac )
\r
219 h->param.analyse.i_trellis = 0;
\r
220 h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
\r
221 + h->param.analyse.f_psy_rd = x264_clip3f( h->param.analyse.f_psy_rd, 0, 10 );
\r
222 + if( h->param.analyse.i_subpel_refine < 6)
\r
223 + h->param.analyse.f_psy_rd = 0;
\r
224 + if( h->param.analyse.f_psy_rd )
\r
226 + h->param.analyse.i_psy_rd = FIX8( h->param.analyse.f_psy_rd );
\r
227 + /* Psy RDO increases overall quantizers to improve the quality of luma--this indirectly hurts chroma quality */
\r
228 + /* so we lower the chroma QP offset to compensate */
\r
229 + h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_rd < 0.25 ? 1 : 2;
\r
230 + /* Psy trellis has a similar effect. */
\r
231 + if( h->param.analyse.i_trellis )
\r
232 + h->param.analyse.i_chroma_qp_offset -= 2;
\r
234 + h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
\r
235 h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 2 );
\r
236 if( h->param.rc.f_aq_strength <= 0 )
\r
237 h->param.rc.i_aq_mode = 0;
\r
238 diff --git a/encoder/macroblock.c b/encoder/macroblock.c
\r
239 index 788a8ea..b429c16 100644
\r
240 --- a/encoder/macroblock.c
\r
241 +++ b/encoder/macroblock.c
\r
242 @@ -94,7 +94,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
\r
243 h->dctf.sub4x4_dct( dct4x4, p_src, p_dst );
\r
245 if( h->mb.b_trellis )
\r
246 - x264_quant_4x4_trellis( h, dct4x4, CQM_4IY, i_qscale, DCT_LUMA_4x4, 1 );
\r
247 + x264_quant_4x4_trellis( h, dct4x4, CQM_4IY, i_qscale, DCT_LUMA_4x4, 1, idx );
\r
249 h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] );
\r
251 @@ -121,7 +121,7 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale )
\r
252 h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
\r
254 if( h->mb.b_trellis )
\r
255 - x264_quant_8x8_trellis( h, dct8x8, CQM_8IY, i_qscale, 1 );
\r
256 + x264_quant_8x8_trellis( h, dct8x8, CQM_8IY, i_qscale, 1, idx );
\r
258 h->quantf.quant_8x8( dct8x8, h->quant8_mf[CQM_8IY][i_qscale], h->quant8_bias[CQM_8IY][i_qscale] );
\r
260 @@ -163,7 +163,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
\r
262 /* quant/scan/dequant */
\r
263 if( h->mb.b_trellis )
\r
264 - x264_quant_4x4_trellis( h, dct4x4[i], CQM_4IY, i_qscale, DCT_LUMA_AC, 1 );
\r
265 + x264_quant_4x4_trellis( h, dct4x4[i], CQM_4IY, i_qscale, DCT_LUMA_AC, 1, i );
\r
267 h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] );
\r
269 @@ -447,7 +447,7 @@ void x264_macroblock_encode( x264_t *h )
\r
270 if( h->mb.b_noise_reduction )
\r
271 h->quantf.denoise_dct_core( *dct8x8[idx], h->nr_residual_sum[1], h->nr_offset[1], 64 );
\r
272 if( h->mb.b_trellis )
\r
273 - x264_quant_8x8_trellis( h, dct8x8[idx], CQM_8PY, i_qp, 0 );
\r
274 + x264_quant_8x8_trellis( h, dct8x8[idx], CQM_8PY, i_qp, 0, idx );
\r
276 h->quantf.quant_8x8( dct8x8[idx], h->quant8_mf[CQM_8PY][i_qp], h->quant8_bias[CQM_8PY][i_qp] );
\r
278 @@ -495,7 +495,7 @@ void x264_macroblock_encode( x264_t *h )
\r
279 if( h->mb.b_noise_reduction )
\r
280 h->quantf.denoise_dct_core( *dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );
\r
281 if( h->mb.b_trellis )
\r
282 - x264_quant_4x4_trellis( h, dct4x4[idx], CQM_4PY, i_qp, DCT_LUMA_4x4, 0 );
\r
283 + x264_quant_4x4_trellis( h, dct4x4[idx], CQM_4PY, i_qp, DCT_LUMA_4x4, 0, idx );
\r
285 h->quantf.quant_4x4( dct4x4[idx], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
\r
287 diff --git a/encoder/macroblock.h b/encoder/macroblock.h
\r
288 index 49d13a2..eb97d44 100644
\r
289 --- a/encoder/macroblock.h
\r
290 +++ b/encoder/macroblock.h
\r
291 @@ -50,9 +50,9 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale );
\r
292 void x264_cabac_mb_skip( x264_t *h, int b_skip );
\r
294 void x264_quant_4x4_trellis( x264_t *h, int16_t dct[4][4], int i_quant_cat,
\r
295 - int i_qp, int i_ctxBlockCat, int b_intra );
\r
296 + int i_qp, int i_ctxBlockCat, int b_intra, int idx );
\r
297 void x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat,
\r
298 - int i_qp, int b_intra );
\r
299 + int i_qp, int b_intra, int idx );
\r
301 void x264_noise_reduction_update( x264_t *h );
\r
303 diff --git a/encoder/rdo.c b/encoder/rdo.c
\r
304 index 76bf57b..5b23e18 100644
\r
305 --- a/encoder/rdo.c
\r
306 +++ b/encoder/rdo.c
\r
307 @@ -50,21 +50,82 @@ static uint16_t cabac_prefix_size[15][128];
\r
309 #define COPY_CABAC h->mc.memcpy_aligned( &cabac_tmp.f8_bits_encoded, &h->cabac.f8_bits_encoded, \
\r
310 sizeof(x264_cabac_t) - offsetof(x264_cabac_t,f8_bits_encoded) )
\r
312 -static int ssd_mb( x264_t *h )
\r
314 +#define ADD_ABS_SATD(satdtype, pixel)\
\r
315 + satd += abs((h->pixf.satdtype[pixel]( zero, 0, fdec, FDEC_STRIDE ) - dc_coef)\
\r
316 + - sum_##satdtype( h, pixel, x, y ));
\r
318 +/* Sum the cached SATDs to avoid repeating them. */
\r
319 +static inline int sum_satd( x264_t *h, int pixel, int x, int y )
\r
322 + int min_x = x>>2;
\r
323 + int min_y = y>>2;
\r
324 + int max_x = (x>>2) + (x264_pixel_size[pixel].w>>2);
\r
325 + int max_y = (y>>2) + (x264_pixel_size[pixel].h>>2);
\r
326 + if( pixel == PIXEL_16x16 )
\r
327 + return h->mb.pic.fenc_satd_sum;
\r
328 + for( y = min_y; y < max_y; y++ )
\r
329 + for( x = min_x; x < max_x; x++ )
\r
330 + satd += h->mb.pic.fenc_satd[y][x];
\r
334 +static inline int sum_sa8d( x264_t *h, int pixel, int x, int y )
\r
337 + int min_x = x>>3;
\r
338 + int min_y = y>>3;
\r
339 + int max_x = (x>>3) + (x264_pixel_size[pixel].w>>3);
\r
340 + int max_y = (y>>3) + (x264_pixel_size[pixel].h>>3);
\r
341 + if( pixel == PIXEL_16x16 )
\r
342 + return h->mb.pic.fenc_sa8d_sum;
\r
343 + for( y = min_y; y < max_y; y++ )
\r
344 + for( x = min_x; x < max_x; x++ )
\r
345 + sa8d += h->mb.pic.fenc_sa8d[y][x];
\r
349 +/* Psy RD distortion metric: SSD plus "Absolute Difference of Complexities" */
\r
350 +/* SATD and SA8D are used to measure block complexity. */
\r
351 +/* Blocks with a complexity most similar to that of the source are scored best. */
\r
352 +/* The difference between SATD and SA8D scores are both used to avoid bias from the DCT size. Using SATD */
\r
353 +/* only, for example, results in overusage of 8x8dct, while the opposite occurs when using SA8D. */
\r
354 +/* This is because frequencies stored in an 8x8dct sum up to a larger value when viewed through a 4x4 */
\r
355 +/* transform and vice versa with a 4x4dct and an 8x8 transform. */
\r
356 +/* SSD is still used as the primary RD metric; this value is merely added to it for psy purposes. */
\r
358 +/* FIXME: Is there a better metric than averaged SATD/SA8D difference for complexity difference? */
\r
359 +/* Hadamard transform is recursive, so a SATD+SA8D can be done faster by taking advantage of this fact. */
\r
360 +/* This optimization can also be used in non-RD transform decision. */
\r
362 +static inline int ssd_plane( x264_t *h, int size, int p, int x, int y )
\r
364 - return h->pixf.ssd[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE,
\r
365 - h->mb.pic.p_fdec[0], FDEC_STRIDE )
\r
366 - + h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[1], FENC_STRIDE,
\r
367 - h->mb.pic.p_fdec[1], FDEC_STRIDE )
\r
368 - + h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[2], FENC_STRIDE,
\r
369 - h->mb.pic.p_fdec[2], FDEC_STRIDE );
\r
370 + DECLARE_ALIGNED_16(uint8_t zero[16]) = {0};
\r
372 + uint8_t *fdec = h->mb.pic.p_fdec[p] + x + y*FDEC_STRIDE;
\r
373 + uint8_t *fenc = h->mb.pic.p_fenc[p] + x + y*FENC_STRIDE;
\r
374 + if( p == 0 && h->param.analyse.i_psy_rd )
\r
376 + int dc_coef = h->pixf.sad[size](zero, 0, fdec, FDEC_STRIDE) >> 1;
\r
377 + ADD_ABS_SATD(satd, size);
\r
378 + /* If the plane is smaller than 8x8, we can't do an SA8D; this probably isn't a big problem. */
\r
379 + if(size <= PIXEL_8x8)
\r
382 + ADD_ABS_SATD(sa8d, size);
\r
385 + satd = (satd * h->param.analyse.i_psy_rd * x264_lambda_tab[h->mb.i_qp] + 128) >> 8;
\r
387 + return h->pixf.ssd[size](fenc, FENC_STRIDE, fdec, FDEC_STRIDE) + satd;
\r
390 -static int ssd_plane( x264_t *h, int size, int p, int x, int y )
\r
391 +static inline int ssd_mb( x264_t *h )
\r
393 - return h->pixf.ssd[size]( h->mb.pic.p_fenc[p] + x+y*FENC_STRIDE, FENC_STRIDE,
\r
394 - h->mb.pic.p_fdec[p] + x+y*FDEC_STRIDE, FDEC_STRIDE );
\r
395 + return ssd_plane(h, PIXEL_16x16, 0, 0, 0)
\r
396 + + ssd_plane(h, PIXEL_8x8, 1, 0, 0)
\r
397 + + ssd_plane(h, PIXEL_8x8, 2, 0, 0);
\r
400 static int x264_rd_cost_mb( x264_t *h, int i_lambda2 )
\r
401 @@ -267,7 +328,7 @@ static const int lambda2_tab[2][52] = {
\r
407 int level_idx; // index into level_tree[]
\r
408 uint8_t cabac_state[10]; //just the contexts relevant to coding abs_level_m1
\r
410 @@ -296,7 +357,7 @@ typedef struct {
\r
411 static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
\r
412 const uint16_t *quant_mf, const int *unquant_mf,
\r
413 const int *coef_weight, const uint8_t *zigzag,
\r
414 - int i_ctxBlockCat, int i_lambda2, int b_ac, int i_coefs )
\r
415 + int i_ctxBlockCat, int i_lambda2, int b_ac, int i_coefs, int idx )
\r
417 int abs_coefs[64], signs[64];
\r
418 trellis_node_t nodes[2][8];
\r
419 @@ -428,8 +489,19 @@ static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
\r
420 // that are better left coded, especially at QP > 40.
\r
421 for( abs_level = q; abs_level >= q-1; abs_level-- )
\r
423 - int d = i_coef - ((unquant_mf[zigzag[i]] * abs_level + 128) >> 8);
\r
424 - uint64_t ssd = (int64_t)d*d * coef_weight[i];
\r
425 + int unquant_abs_level = ((unquant_mf[zigzag[i]] * abs_level + 128) >> 8);
\r
426 + int d = i_coef - unquant_abs_level;
\r
428 + if( h->param.analyse.i_psy_rd && i != 0 )
\r
430 + int orig_coef = (i_coefs == 64) ? h->mb.pic.fenc_dct8[idx][i] : h->mb.pic.fenc_dct4[idx][i];
\r
431 + int predicted_coef = abs(orig_coef - i_coef * signs[i] );
\r
432 + int psy_value = h->param.analyse.i_psy_rd * abs(predicted_coef + unquant_abs_level * signs[i]);
\r
433 + int psy_weight = (i_coefs == 64) ? x264_dct8_weight_tab[zigzag[i]] : x264_dct4_weight_tab[zigzag[i]];
\r
434 + ssd = (int64_t)d*d * coef_weight[i] - psy_weight * (psy_value>>2);
\r
437 + ssd = (int64_t)d*d * coef_weight[i];
\r
439 for( j = 0; j < 8; j++ )
\r
441 @@ -493,24 +565,24 @@ static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
\r
444 void x264_quant_4x4_trellis( x264_t *h, int16_t dct[4][4], int i_quant_cat,
\r
445 - int i_qp, int i_ctxBlockCat, int b_intra )
\r
446 + int i_qp, int i_ctxBlockCat, int b_intra, int idx )
\r
448 int b_ac = (i_ctxBlockCat == DCT_LUMA_AC);
\r
449 quant_trellis_cabac( h, (int16_t*)dct,
\r
450 h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
\r
451 x264_dct4_weight2_zigzag[h->mb.b_interlaced],
\r
452 x264_zigzag_scan4[h->mb.b_interlaced],
\r
453 - i_ctxBlockCat, lambda2_tab[b_intra][i_qp], b_ac, 16 );
\r
454 + i_ctxBlockCat, lambda2_tab[b_intra][h->mb.i_qp], b_ac, 16, idx );
\r
458 void x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat,
\r
459 - int i_qp, int b_intra )
\r
460 + int i_qp, int b_intra, int idx )
\r
462 quant_trellis_cabac( h, (int16_t*)dct,
\r
463 h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
\r
464 x264_dct8_weight2_zigzag[h->mb.b_interlaced],
\r
465 x264_zigzag_scan8[h->mb.b_interlaced],
\r
466 - DCT_LUMA_8x8, lambda2_tab[b_intra][i_qp], 0, 64 );
\r
467 + DCT_LUMA_8x8, lambda2_tab[b_intra][h->mb.i_qp], 0, 64, idx );
\r
470 diff --git a/x264.c b/x264.c
\r
471 index 14466e5..504ca12 100644
\r
474 @@ -243,6 +243,9 @@ static void Help( x264_param_t *defaults, int b_longhelp )
\r
475 H0( " -m, --subme <integer> Subpixel motion estimation and partition\n"
\r
476 " decision quality: 1=fast, 7=best. [%d]\n", defaults->analyse.i_subpel_refine );
\r
477 H0( " --b-rdo RD based mode decision for B-frames. Requires subme 6.\n" );
\r
478 + H0( " --psy-rd Strength of mode decision psychovisual optimization [\"%.1f\"]\n"
\r
479 + " Does nothing at subme < 6.\n",
\r
480 + defaults->analyse.f_psy_rd );
\r
481 H0( " --mixed-refs Decide references on a per partition basis\n" );
\r
482 H1( " --no-chroma-me Ignore chroma in motion estimation\n" );
\r
483 H1( " --bime Jointly optimize both MVs in B-frames\n" );
\r
484 @@ -411,6 +414,7 @@ static int Parse( int argc, char **argv,
\r
485 { "mvrange", required_argument, NULL, 0 },
\r
486 { "mvrange-thread", required_argument, NULL, 0 },
\r
487 { "subme", required_argument, NULL, 'm' },
\r
488 + { "psy-rd", required_argument, NULL, 0 },
\r
489 { "b-rdo", no_argument, NULL, 0 },
\r
490 { "mixed-refs", no_argument, NULL, 0 },
\r
491 { "no-chroma-me", no_argument, NULL, 0 },
\r
492 diff --git a/x264.h b/x264.h
\r
493 index 3b678dc..02266c1 100644
\r
496 @@ -239,6 +239,8 @@ typedef struct x264_param_t
\r
497 int b_fast_pskip; /* early SKIP detection on P-frames */
\r
498 int b_dct_decimate; /* transform coefficient thresholding on P-frames */
\r
499 int i_noise_reduction; /* adaptive pseudo-deadzone */
\r
500 + float f_psy_rd; /* Psy RD strength */
\r
501 + int i_psy_rd; /* Psy RD strength--fixed point value*/
\r
503 /* the deadzone size that will be used in luma quantization */
\r
504 int i_luma_deadzone[2]; /* {inter, intra} */
\r