1 /*****************************************************************************
2 * quant.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2005-2008 x264 project
6 * Authors: Loren Merritt <lorenm@u.washington.edu>
7 * Christian Heine <sennindemokrit@gmx.net>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22 *****************************************************************************/
27 #include "x86/quant.h"
30 # include "ppc/quant.h"
33 #define QUANT_ONE( coef, mf, f ) \
36 (coef) = (f + (coef)) * (mf) >> 16; \
38 (coef) = - ((f - (coef)) * (mf) >> 16); \
41 static void quant_8x8( int16_t dct
[8][8], uint16_t mf
[64], uint16_t bias
[64] )
44 for( i
= 0; i
< 64; i
++ )
45 QUANT_ONE( dct
[0][i
], mf
[i
], bias
[i
] );
48 static void quant_4x4( int16_t dct
[4][4], uint16_t mf
[16], uint16_t bias
[16] )
51 for( i
= 0; i
< 16; i
++ )
52 QUANT_ONE( dct
[0][i
], mf
[i
], bias
[i
] );
55 static void quant_4x4_dc( int16_t dct
[4][4], int mf
, int bias
)
58 for( i
= 0; i
< 16; i
++ )
59 QUANT_ONE( dct
[0][i
], mf
, bias
);
62 static void quant_2x2_dc( int16_t dct
[2][2], int mf
, int bias
)
64 QUANT_ONE( dct
[0][0], mf
, bias
);
65 QUANT_ONE( dct
[0][1], mf
, bias
);
66 QUANT_ONE( dct
[0][2], mf
, bias
);
67 QUANT_ONE( dct
[0][3], mf
, bias
);
70 #define DEQUANT_SHL( x ) \
71 dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][y][x] ) << i_qbits
73 #define DEQUANT_SHR( x ) \
74 dct[y][x] = ( dct[y][x] * dequant_mf[i_mf][y][x] + f ) >> (-i_qbits)
76 static void dequant_4x4( int16_t dct
[4][4], int dequant_mf
[6][4][4], int i_qp
)
78 const int i_mf
= i_qp
%6;
79 const int i_qbits
= i_qp
/6 - 4;
84 for( y
= 0; y
< 4; y
++ )
94 const int f
= 1 << (-i_qbits
-1);
95 for( y
= 0; y
< 4; y
++ )
105 static void dequant_8x8( int16_t dct
[8][8], int dequant_mf
[6][8][8], int i_qp
)
107 const int i_mf
= i_qp
%6;
108 const int i_qbits
= i_qp
/6 - 6;
113 for( y
= 0; y
< 8; y
++ )
127 const int f
= 1 << (-i_qbits
-1);
128 for( y
= 0; y
< 8; y
++ )
142 static void dequant_4x4_dc( int16_t dct
[4][4], int dequant_mf
[6][4][4], int i_qp
)
144 const int i_qbits
= i_qp
/6 - 6;
149 const int i_dmf
= dequant_mf
[i_qp
%6][0][0] << i_qbits
;
151 for( y
= 0; y
< 4; y
++ )
161 const int i_dmf
= dequant_mf
[i_qp
%6][0][0];
162 const int f
= 1 << (-i_qbits
-1);
164 for( y
= 0; y
< 4; y
++ )
166 dct
[y
][0] = ( dct
[y
][0] * i_dmf
+ f
) >> (-i_qbits
);
167 dct
[y
][1] = ( dct
[y
][1] * i_dmf
+ f
) >> (-i_qbits
);
168 dct
[y
][2] = ( dct
[y
][2] * i_dmf
+ f
) >> (-i_qbits
);
169 dct
[y
][3] = ( dct
[y
][3] * i_dmf
+ f
) >> (-i_qbits
);
174 static void x264_denoise_dct( int16_t *dct
, uint32_t *sum
, uint16_t *offset
, int size
)
177 for( i
=1; i
<size
; i
++ )
180 int sign
= level
>>15;
181 level
= (level
+sign
)^sign
;
184 dct
[i
] = level
<0 ? 0 : (level
^sign
)-sign
;
189 * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
190 * to 0 (low score means set it to null)
191 * Used in inter macroblock (luma and chroma)
192 * luma: for a 8x8 block: if score < 4 -> null
193 * for the complete mb: if score < 6 -> null
194 * chroma: for the complete mb: if score < 7 -> null
197 const uint8_t x264_decimate_table4
[16] = {
198 3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 };
199 const uint8_t x264_decimate_table8
[64] = {
200 3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
201 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
202 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
203 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
205 static int ALWAYS_INLINE
x264_decimate_score_internal( int16_t *dct
, int i_max
)
207 const uint8_t *ds_table
= (i_max
== 64) ? x264_decimate_table8
: x264_decimate_table4
;
211 /* Yes, dct[idx-1] is guaranteed to be 32-bit aligned. idx>=0 instead of 1 works correctly for the same reason */
212 while( idx
>= 0 && *(uint32_t*)&dct
[idx
-1] == 0 )
214 if( idx
>= 0 && dct
[idx
] == 0 )
220 if( (unsigned)(dct
[idx
--] + 1) > 2 )
224 while( idx
>= 0 && dct
[idx
] == 0 )
229 i_score
+= ds_table
[i_run
];
235 static int x264_decimate_score15( int16_t *dct
)
237 return x264_decimate_score_internal( dct
+1, 15 );
239 static int x264_decimate_score16( int16_t *dct
)
241 return x264_decimate_score_internal( dct
, 16 );
243 static int x264_decimate_score64( int16_t *dct
)
245 return x264_decimate_score_internal( dct
, 64 );
248 static int ALWAYS_INLINE
x264_coeff_last_internal( int16_t *l
, int i_count
)
251 for( i_last
= i_count
-1; i_last
>= 3; i_last
-= 4 )
252 if( *(uint64_t*)(l
+i_last
-3) )
254 while( i_last
>= 0 && l
[i_last
] == 0 )
259 static int x264_coeff_last4( int16_t *l
)
261 return x264_coeff_last_internal( l
, 4 );
263 static int x264_coeff_last15( int16_t *l
)
265 return x264_coeff_last_internal( l
, 15 );
267 static int x264_coeff_last16( int16_t *l
)
269 return x264_coeff_last_internal( l
, 16 );
271 static int x264_coeff_last64( int16_t *l
)
273 return x264_coeff_last_internal( l
, 64 );
276 void x264_quant_init( x264_t
*h
, int cpu
, x264_quant_function_t
*pf
)
278 pf
->quant_8x8
= quant_8x8
;
279 pf
->quant_4x4
= quant_4x4
;
280 pf
->quant_4x4_dc
= quant_4x4_dc
;
281 pf
->quant_2x2_dc
= quant_2x2_dc
;
283 pf
->dequant_4x4
= dequant_4x4
;
284 pf
->dequant_4x4_dc
= dequant_4x4_dc
;
285 pf
->dequant_8x8
= dequant_8x8
;
287 pf
->denoise_dct
= x264_denoise_dct
;
288 pf
->decimate_score15
= x264_decimate_score15
;
289 pf
->decimate_score16
= x264_decimate_score16
;
290 pf
->decimate_score64
= x264_decimate_score64
;
292 pf
->coeff_last
[DCT_CHROMA_DC
] = x264_coeff_last4
;
293 pf
->coeff_last
[ DCT_LUMA_AC
] = x264_coeff_last15
;
294 pf
->coeff_last
[ DCT_LUMA_4x4
] = x264_coeff_last16
;
295 pf
->coeff_last
[ DCT_LUMA_8x8
] = x264_coeff_last64
;
298 if( cpu
&X264_CPU_MMX
)
301 pf
->quant_4x4
= x264_quant_4x4_mmx
;
302 pf
->quant_8x8
= x264_quant_8x8_mmx
;
303 pf
->dequant_4x4
= x264_dequant_4x4_mmx
;
304 pf
->dequant_4x4_dc
= x264_dequant_4x4dc_mmxext
;
305 pf
->dequant_8x8
= x264_dequant_8x8_mmx
;
306 if( h
->param
.i_cqm_preset
== X264_CQM_FLAT
)
308 pf
->dequant_4x4
= x264_dequant_4x4_flat16_mmx
;
309 pf
->dequant_8x8
= x264_dequant_8x8_flat16_mmx
;
311 pf
->denoise_dct
= x264_denoise_dct_mmx
;
315 if( cpu
&X264_CPU_MMXEXT
)
317 pf
->quant_2x2_dc
= x264_quant_2x2_dc_mmxext
;
319 pf
->quant_4x4_dc
= x264_quant_4x4_dc_mmxext
;
320 pf
->decimate_score15
= x264_decimate_score15_mmxext
;
321 pf
->decimate_score16
= x264_decimate_score16_mmxext
;
322 pf
->decimate_score64
= x264_decimate_score64_mmxext
;
323 pf
->coeff_last
[ DCT_LUMA_AC
] = x264_coeff_last15_mmxext
;
324 pf
->coeff_last
[ DCT_LUMA_4x4
] = x264_coeff_last16_mmxext
;
325 pf
->coeff_last
[ DCT_LUMA_8x8
] = x264_coeff_last64_mmxext
;
327 pf
->coeff_last
[DCT_CHROMA_DC
] = x264_coeff_last4_mmxext
;
330 if( cpu
&X264_CPU_SSE2
)
332 pf
->quant_4x4_dc
= x264_quant_4x4_dc_sse2
;
333 pf
->quant_4x4
= x264_quant_4x4_sse2
;
334 pf
->quant_8x8
= x264_quant_8x8_sse2
;
335 pf
->dequant_4x4
= x264_dequant_4x4_sse2
;
336 pf
->dequant_4x4_dc
= x264_dequant_4x4dc_sse2
;
337 pf
->dequant_8x8
= x264_dequant_8x8_sse2
;
338 if( h
->param
.i_cqm_preset
== X264_CQM_FLAT
)
340 pf
->dequant_4x4
= x264_dequant_4x4_flat16_sse2
;
341 pf
->dequant_8x8
= x264_dequant_8x8_flat16_sse2
;
343 pf
->denoise_dct
= x264_denoise_dct_sse2
;
344 pf
->decimate_score15
= x264_decimate_score15_sse2
;
345 pf
->decimate_score16
= x264_decimate_score16_sse2
;
346 pf
->decimate_score64
= x264_decimate_score64_sse2
;
347 pf
->coeff_last
[ DCT_LUMA_AC
] = x264_coeff_last15_sse2
;
348 pf
->coeff_last
[DCT_LUMA_4x4
] = x264_coeff_last16_sse2
;
349 pf
->coeff_last
[DCT_LUMA_8x8
] = x264_coeff_last64_sse2
;
352 if( cpu
&X264_CPU_SSSE3
)
354 pf
->quant_2x2_dc
= x264_quant_2x2_dc_ssse3
;
355 pf
->quant_4x4_dc
= x264_quant_4x4_dc_ssse3
;
356 pf
->quant_4x4
= x264_quant_4x4_ssse3
;
357 pf
->quant_8x8
= x264_quant_8x8_ssse3
;
358 pf
->denoise_dct
= x264_denoise_dct_ssse3
;
359 pf
->decimate_score15
= x264_decimate_score15_ssse3
;
360 pf
->decimate_score16
= x264_decimate_score16_ssse3
;
361 pf
->decimate_score64
= x264_decimate_score64_ssse3
;
366 if( cpu
&X264_CPU_ALTIVEC
) {
367 pf
->quant_2x2_dc
= x264_quant_2x2_dc_altivec
;
368 pf
->quant_4x4_dc
= x264_quant_4x4_dc_altivec
;
369 pf
->quant_4x4
= x264_quant_4x4_altivec
;
370 pf
->quant_8x8
= x264_quant_8x8_altivec
;
372 pf
->dequant_4x4
= x264_dequant_4x4_altivec
;
373 pf
->dequant_8x8
= x264_dequant_8x8_altivec
;
376 pf
->coeff_last
[ DCT_LUMA_DC
] = pf
->coeff_last
[DCT_LUMA_4x4
];
377 pf
->coeff_last
[DCT_CHROMA_AC
] = pf
->coeff_last
[ DCT_LUMA_AC
];