From 1746b2adc6a9f1e3e0f70a1a6a992f1af6851b5d Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Thu, 2 Aug 2012 11:58:09 -0700 Subject: [PATCH] Added row based loopfilter Interleaved loopfiltering with decode. For 1080p clips, up to 1% performance gain. For 4k clips, up to 10% seen. This patch is required for better "frame-based" multithreading. Change-Id: Ic834cf32297cc04f27e8205652fb9f70cbe290db --- vp8/common/blockd.h | 2 +- vp8/common/loopfilter.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++ vp8/common/loopfilter.h | 12 ++++++ vp8/decoder/decodframe.c | 55 ++++++++++++++++++++---- vp8/decoder/onyxd_if.c | 8 ---- 5 files changed, 167 insertions(+), 18 deletions(-) diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index acef8caa2e..f7ff577635 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -161,7 +161,7 @@ typedef struct uint8_t segment_id; /* Which set of segmentation parameters should be used for this MB */ } MB_MODE_INFO; -typedef struct +typedef struct modeinfo { MB_MODE_INFO mbmi; union b_mode_info bmi[16]; diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c index b9ac0ff3e8..41b4f1214d 100644 --- a/vp8/common/loopfilter.c +++ b/vp8/common/loopfilter.c @@ -196,6 +196,114 @@ void vp8_loop_filter_frame_init(VP8_COMMON *cm, } } + +void vp8_loop_filter_row_normal(VP8_COMMON *cm, MODE_INFO *mode_info_context, + int mb_row, int post_ystride, int post_uvstride, + unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr) +{ + int mb_col; + int filter_level; + loop_filter_info_n *lfi_n = &cm->lf_info; + loop_filter_info lfi; + FRAME_TYPE frame_type = cm->frame_type; + + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { + int skip_lf = (mode_info_context->mbmi.mode != B_PRED && + mode_info_context->mbmi.mode != SPLITMV && + mode_info_context->mbmi.mb_skip_coeff); + + const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; + const int seg = mode_info_context->mbmi.segment_id; + const int ref_frame = mode_info_context->mbmi.ref_frame; + + filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; + + if (filter_level) + { + const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; + lfi.mblim = lfi_n->mblim[filter_level]; + lfi.blim = lfi_n->blim[filter_level]; + lfi.lim = lfi_n->lim[filter_level]; + lfi.hev_thr = lfi_n->hev_thr[hev_index]; + + if (mb_col > 0) + vp8_loop_filter_mbv + (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); + + if (!skip_lf) + vp8_loop_filter_bv + (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); + + /* don't apply across umv border */ + if (mb_row > 0) + vp8_loop_filter_mbh + (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); + + if (!skip_lf) + vp8_loop_filter_bh + (y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); + } + + y_ptr += 16; + u_ptr += 8; + v_ptr += 8; + + mode_info_context++; /* step to next MB */ + } + +} + +void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context, + int mb_row, int post_ystride, int post_uvstride, + unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr) +{ + int mb_col; + int filter_level; + loop_filter_info_n *lfi_n = &cm->lf_info; + + for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) + { + int skip_lf = (mode_info_context->mbmi.mode != B_PRED && + mode_info_context->mbmi.mode != SPLITMV && + mode_info_context->mbmi.mb_skip_coeff); + + const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; + const int seg = mode_info_context->mbmi.segment_id; + const int ref_frame = mode_info_context->mbmi.ref_frame; + + filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; + + if (filter_level) + { + if (mb_col > 0) + vp8_loop_filter_simple_mbv + (y_ptr, post_ystride, lfi_n->mblim[filter_level]); + + if (!skip_lf) + vp8_loop_filter_simple_bv + (y_ptr, post_ystride, lfi_n->blim[filter_level]); + + /* don't apply across umv border */ + if (mb_row > 0) + vp8_loop_filter_simple_mbh + (y_ptr, post_ystride, lfi_n->mblim[filter_level]); + + if (!skip_lf) + vp8_loop_filter_simple_bh + (y_ptr, post_ystride, lfi_n->blim[filter_level]); + } + + y_ptr += 16; + u_ptr += 8; + v_ptr += 8; + + mode_info_context++; /* step to next MB */ + } + +} void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int frame_type) diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h index 0497271b02..b3af2d6500 100644 --- a/vp8/common/loopfilter.h +++ b/vp8/common/loopfilter.h @@ -69,6 +69,7 @@ typedef void loop_filter_uvfunction /* assorted loopfilter functions which get used elsewhere */ struct VP8Common; struct macroblockd; +struct modeinfo; void vp8_loop_filter_init(struct VP8Common *cm); @@ -90,4 +91,15 @@ void vp8_loop_filter_frame_yonly(struct VP8Common *cm, void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl); +void vp8_loop_filter_row_normal(struct VP8Common *cm, + struct modeinfo *mode_info_context, + int mb_row, int post_ystride, int post_uvstride, + unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr); + +void vp8_loop_filter_row_simple(struct VP8Common *cm, + struct modeinfo *mode_info_context, + int mb_row, int post_ystride, int post_uvstride, + unsigned char *y_ptr, unsigned char *u_ptr, + unsigned char *v_ptr); #endif diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 4a33f5aef6..2d497b9404 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -311,6 +311,8 @@ static void decode_mb_rows(VP8D_COMP *pbi) VP8_COMMON *const pc = & pbi->common; MACROBLOCKD *const xd = & pbi->mb; + MODE_INFO *lf_mic = xd->mode_info_context; + int ibc = 0; int num_part = 1 << pc->multi_token_partition; @@ -323,6 +325,7 @@ static void decode_mb_rows(VP8D_COMP *pbi) unsigned char *ref_buffer[MAX_REF_FRAMES][3]; unsigned char *dst_buffer[3]; + unsigned char *lf_dst[3]; int i; int ref_fb_index[MAX_REF_FRAMES]; int ref_fb_corrupted[MAX_REF_FRAMES]; @@ -342,12 +345,17 @@ static void decode_mb_rows(VP8D_COMP *pbi) ref_fb_corrupted[i] = pc->yv12_fb[ref_fb_index[i]].corrupted; } - dst_buffer[0] = pc->yv12_fb[dst_fb_idx].y_buffer; - dst_buffer[1] = pc->yv12_fb[dst_fb_idx].u_buffer; - dst_buffer[2] = pc->yv12_fb[dst_fb_idx].v_buffer; + /* Set up the buffer pointers */ + lf_dst[0] = dst_buffer[0] = pc->yv12_fb[dst_fb_idx].y_buffer; + lf_dst[1] = dst_buffer[1] = pc->yv12_fb[dst_fb_idx].u_buffer; + lf_dst[2] = dst_buffer[2] = pc->yv12_fb[dst_fb_idx].v_buffer; xd->up_available = 0; + /* Initialize the loop filter for this frame. */ + if(pc->filter_level) + vp8_loop_filter_frame_init(pc, xd, pc->filter_level); + /* Decode the individual macro block */ for (mb_row = 0; mb_row < pc->mb_rows; mb_row++) { @@ -449,26 +457,55 @@ static void decode_mb_rows(VP8D_COMP *pbi) xd->recon_left[1] += 8; xd->recon_left[2] += 8; - recon_yoffset += 16; recon_uvoffset += 8; ++xd->mode_info_context; /* next mb */ xd->above_context++; - } /* adjust to the next row of mbs */ - vp8_extend_mb_row( - &pc->yv12_fb[dst_fb_idx], - xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8 - ); + vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, + xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); ++xd->mode_info_context; /* skip prediction column */ xd->up_available = 1; + if(pc->filter_level) + { + if(mb_row > 0) + { + if (pc->filter_type == NORMAL_LOOPFILTER) + vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, + recon_y_stride, recon_uv_stride, + lf_dst[0], lf_dst[1], lf_dst[2]); + else + vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, + recon_y_stride, recon_uv_stride, + lf_dst[0], lf_dst[1], lf_dst[2]); + lf_dst[0] += recon_y_stride * 16; + lf_dst[1] += recon_uv_stride * 8; + lf_dst[2] += recon_uv_stride * 8; + lf_mic += pc->mb_cols; + lf_mic++; /* Skip border mb */ + } + } + } + + if(pc->filter_level) + { + if (pc->filter_type == NORMAL_LOOPFILTER) + vp8_loop_filter_row_normal(pc, lf_mic, mb_row-1, recon_y_stride, + recon_uv_stride, lf_dst[0], lf_dst[1], + lf_dst[2]); + else + vp8_loop_filter_row_simple(pc, lf_mic, mb_row-1, recon_y_stride, + recon_uv_stride, lf_dst[0], lf_dst[1], + lf_dst[2]); } + + vp8_yv12_extend_frame_borders(&pc->yv12_fb[dst_fb_idx]); } static unsigned int read_partition_size(const unsigned char *cx_size) diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c index 1c67e71d75..6cb7be6bf2 100644 --- a/vp8/decoder/onyxd_if.c +++ b/vp8/decoder/onyxd_if.c @@ -467,16 +467,8 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, unsigned long size, const unsi pbi->num_fragments = 0; return -1; } - - if(cm->filter_level) - { - /* Apply the loop filter if appropriate. */ - vp8_loop_filter_frame(cm, &pbi->mb, cm->frame_type); - } - vp8_yv12_extend_frame_borders(cm->frame_to_show); } - vp8_clear_system_state(); #if CONFIG_ERROR_CONCEALMENT -- 2.11.4.GIT