encoder/macroblock.h

   1 /*****************************************************************************
   2  * macroblock.h: macroblock encoding
   3  *****************************************************************************
   4  * Copyright (C) 2003-2019 x264 project
   5  *
   6  * Authors: Loren Merritt <lorenm@u.washington.edu>
   7  *          Laurent Aimar <fenrir@via.ecp.fr>
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of the GNU General Public License as published by
  11  * the Free Software Foundation; either version 2 of the License, or
  12  * (at your option) any later version.
  13  *
  14  * This program is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17  * GNU General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU General Public License
  20  * along with this program; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
  22  *
  23  * This program is also available under a commercial proprietary license.
  24  * For more information, contact us at licensing@x264.com.
  25  *****************************************************************************/
  26
  27 #ifndef X264_ENCODER_MACROBLOCK_H
  28 #define X264_ENCODER_MACROBLOCK_H
  29
  30 #include "common/macroblock.h"
  31
  32 #define x264_rdo_init x264_template(rdo_init)
  33 void x264_rdo_init( void );
  34
  35 #define x264_macroblock_probe_skip x264_template(macroblock_probe_skip)
  36 int x264_macroblock_probe_skip( x264_t *h, int b_bidir );
  37
  38 #define x264_macroblock_probe_pskip( h )\
  39     x264_macroblock_probe_skip( h, 0 )
  40 #define x264_macroblock_probe_bskip( h )\
  41     x264_macroblock_probe_skip( h, 1 )
  42
  43 #define x264_predict_lossless_4x4 x264_template(predict_lossless_4x4)
  44 void x264_predict_lossless_4x4( x264_t *h, pixel *p_dst, int p, int idx, int i_mode );
  45 #define x264_predict_lossless_8x8 x264_template(predict_lossless_8x8)
  46 void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int p, int idx, int i_mode, pixel edge[36] );
  47 #define x264_predict_lossless_16x16 x264_template(predict_lossless_16x16)
  48 void x264_predict_lossless_16x16( x264_t *h, int p, int i_mode );
  49 #define x264_predict_lossless_chroma x264_template(predict_lossless_chroma)
  50 void x264_predict_lossless_chroma( x264_t *h, int i_mode );
  51
  52 #define x264_macroblock_encode x264_template(macroblock_encode)
  53 void x264_macroblock_encode      ( x264_t *h );
  54 #define x264_macroblock_write_cabac x264_template(macroblock_write_cabac)
  55 void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb );
  56 #define x264_macroblock_write_cavlc x264_template(macroblock_write_cavlc)
  57 void x264_macroblock_write_cavlc ( x264_t *h );
  58
  59 #define x264_macroblock_encode_p8x8 x264_template(macroblock_encode_p8x8)
  60 void x264_macroblock_encode_p8x8( x264_t *h, int i8 );
  61 #define x264_macroblock_encode_p4x4 x264_template(macroblock_encode_p4x4)
  62 void x264_macroblock_encode_p4x4( x264_t *h, int i4 );
  63 #define x264_mb_encode_chroma x264_template(mb_encode_chroma)
  64 void x264_mb_encode_chroma( x264_t *h, int b_inter, int i_qp );
  65
  66 #define x264_cabac_mb_skip x264_template(cabac_mb_skip)
  67 void x264_cabac_mb_skip( x264_t *h, int b_skip );
  68 #define x264_cabac_block_residual_c x264_template(cabac_block_residual_c)
  69 void x264_cabac_block_residual_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l );
  70 #define x264_cabac_block_residual_8x8_rd_c x264_template(cabac_block_residual_8x8_rd_c)
  71 void x264_cabac_block_residual_8x8_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l );
  72 #define x264_cabac_block_residual_rd_c x264_template(cabac_block_residual_rd_c)
  73 void x264_cabac_block_residual_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l );
  74
  75 #define x264_quant_luma_dc_trellis x264_template(quant_luma_dc_trellis)
  76 int x264_quant_luma_dc_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, int i_qp,
  77                                 int ctx_block_cat, int b_intra, int idx );
  78 #define x264_quant_chroma_dc_trellis x264_template(quant_chroma_dc_trellis)
  79 int x264_quant_chroma_dc_trellis( x264_t *h, dctcoef *dct, int i_qp, int b_intra, int idx );
  80 #define x264_quant_4x4_trellis x264_template(quant_4x4_trellis)
  81 int x264_quant_4x4_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
  82                              int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx );
  83 #define x264_quant_8x8_trellis x264_template(quant_8x8_trellis)
  84 int x264_quant_8x8_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
  85                              int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx );
  86
  87 #define x264_noise_reduction_update x264_template(noise_reduction_update)
  88 void x264_noise_reduction_update( x264_t *h );
  89
  90 static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, int ctx_block_cat, int b_intra, int p, int idx )
  91 {
  92     int i_quant_cat = b_intra ? (p?CQM_4IC:CQM_4IY) : (p?CQM_4PC:CQM_4PY);
  93     if( h->mb.b_noise_reduction )
  94         h->quantf.denoise_dct( dct, h->nr_residual_sum[0+!!p*2], h->nr_offset[0+!!p*2], 16 );
  95     if( h->mb.b_trellis )
  96         return x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, ctx_block_cat, b_intra, !!p, idx+p*16 );
  97     else
  98         return h->quantf.quant_4x4( dct, h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );
  99 }
 100
 101 static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, dctcoef dct[64], int i_qp, int ctx_block_cat, int b_intra, int p, int idx )
 102 {
 103     int i_quant_cat = b_intra ? (p?CQM_8IC:CQM_8IY) : (p?CQM_8PC:CQM_8PY);
 104     if( h->mb.b_noise_reduction )
 105         h->quantf.denoise_dct( dct, h->nr_residual_sum[1+!!p*2], h->nr_offset[1+!!p*2], 64 );
 106     if( h->mb.b_trellis )
 107         return x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, ctx_block_cat, b_intra, !!p, idx+p*4 );
 108     else
 109         return h->quantf.quant_8x8( dct, h->quant8_mf[i_quant_cat][i_qp], h->quant8_bias[i_quant_cat][i_qp] );
 110 }
 111
 112 #define STORE_8x8_NNZ( p, idx, nz )\
 113 do\
 114 {\
 115     M16( &h->mb.cache.non_zero_count[x264_scan8[p*16+idx*4]+0] ) = (nz) * 0x0101;\
 116     M16( &h->mb.cache.non_zero_count[x264_scan8[p*16+idx*4]+8] ) = (nz) * 0x0101;\
 117 } while( 0 )
 118
 119 #define CLEAR_16x16_NNZ( p ) \
 120 do\
 121 {\
 122     M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 0*8] ) = 0;\
 123     M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 1*8] ) = 0;\
 124     M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 2*8] ) = 0;\
 125     M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 3*8] ) = 0;\
 126 } while( 0 )
 127
 128 /* A special for loop that iterates branchlessly over each set
 129  * bit in a 4-bit input. */
 130 #define FOREACH_BIT(idx,start,mask) for( int idx = start, msk = mask, skip; msk && (skip = x264_ctz_4bit(msk), idx += skip, msk >>= skip+1, 1); idx++ )
 131
 132 static ALWAYS_INLINE void x264_mb_encode_i4x4( x264_t *h, int p, int idx, int i_qp, int i_mode, int b_predict )
 133 {
 134     int nz;
 135     pixel *p_src = &h->mb.pic.p_fenc[p][block_idx_xy_fenc[idx]];
 136     pixel *p_dst = &h->mb.pic.p_fdec[p][block_idx_xy_fdec[idx]];
 137     ALIGNED_ARRAY_64( dctcoef, dct4x4,[16] );
 138
 139     if( b_predict )
 140     {
 141         if( h->mb.b_lossless )
 142             x264_predict_lossless_4x4( h, p_dst, p, idx, i_mode );
 143         else
 144             h->predict_4x4[i_mode]( p_dst );
 145     }
 146
 147     if( h->mb.b_lossless )
 148     {
 149         nz = h->zigzagf.sub_4x4( h->dct.luma4x4[p*16+idx], p_src, p_dst );
 150         h->mb.cache.non_zero_count[x264_scan8[p*16+idx]] = nz;
 151         h->mb.i_cbp_luma |= nz<<(idx>>2);
 152         return;
 153     }
 154
 155     h->dctf.sub4x4_dct( dct4x4, p_src, p_dst );
 156
 157     nz = x264_quant_4x4( h, dct4x4, i_qp, ctx_cat_plane[DCT_LUMA_4x4][p], 1, p, idx );
 158     h->mb.cache.non_zero_count[x264_scan8[p*16+idx]] = nz;
 159     if( nz )
 160     {
 161         h->mb.i_cbp_luma |= 1<<(idx>>2);
 162         h->zigzagf.scan_4x4( h->dct.luma4x4[p*16+idx], dct4x4 );
 163         h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[p?CQM_4IC:CQM_4IY], i_qp );
 164         h->dctf.add4x4_idct( p_dst, dct4x4 );
 165     }
 166 }
 167
 168 static ALWAYS_INLINE void x264_mb_encode_i8x8( x264_t *h, int p, int idx, int i_qp, int i_mode, pixel *edge, int b_predict )
 169 {
 170     int x = idx&1;
 171     int y = idx>>1;
 172     int nz;
 173     pixel *p_src = &h->mb.pic.p_fenc[p][8*x + 8*y*FENC_STRIDE];
 174     pixel *p_dst = &h->mb.pic.p_fdec[p][8*x + 8*y*FDEC_STRIDE];
 175     ALIGNED_ARRAY_64( dctcoef, dct8x8,[64] );
 176     ALIGNED_ARRAY_32( pixel, edge_buf,[36] );
 177
 178     if( b_predict )
 179     {
 180         if( !edge )
 181         {
 182             h->predict_8x8_filter( p_dst, edge_buf, h->mb.i_neighbour8[idx], x264_pred_i4x4_neighbors[i_mode] );
 183             edge = edge_buf;
 184         }
 185
 186         if( h->mb.b_lossless )
 187             x264_predict_lossless_8x8( h, p_dst, p, idx, i_mode, edge );
 188         else
 189             h->predict_8x8[i_mode]( p_dst, edge );
 190     }
 191
 192     if( h->mb.b_lossless )
 193     {
 194         nz = h->zigzagf.sub_8x8( h->dct.luma8x8[p*4+idx], p_src, p_dst );
 195         STORE_8x8_NNZ( p, idx, nz );
 196         h->mb.i_cbp_luma |= nz<<idx;
 197         return;
 198     }
 199
 200     h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
 201
 202     nz = x264_quant_8x8( h, dct8x8, i_qp, ctx_cat_plane[DCT_LUMA_8x8][p], 1, p, idx );
 203     if( nz )
 204     {
 205         h->mb.i_cbp_luma |= 1<<idx;
 206         h->zigzagf.scan_8x8( h->dct.luma8x8[p*4+idx], dct8x8 );
 207         h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[p?CQM_8IC:CQM_8IY], i_qp );
 208         h->dctf.add8x8_idct8( p_dst, dct8x8 );
 209         STORE_8x8_NNZ( p, idx, 1 );
 210     }
 211     else
 212         STORE_8x8_NNZ( p, idx, 0 );
 213 }
 214
 215 #endif