av1_convolve_ x,y _avx2() -- use 256 bit load/store
[aom.git] / test / av1_convolve_optimz_test.cc
blob0d1f30fcce83392f4959731038cb966203b84d83
1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
14 #include "./av1_rtcd.h"
15 #include "test/acm_random.h"
16 #include "test/clear_system_state.h"
17 #include "test/register_state_check.h"
18 #include "test/util.h"
20 namespace {
22 using std::tr1::tuple;
23 using libaom_test::ACMRandom;
25 typedef void (*ConvInit)();
26 typedef void (*conv_filter_t)(const uint8_t *, int, uint8_t *, int, int, int,
27 const InterpFilterParams, int, int,
28 ConvolveParams *);
29 typedef void (*hbd_conv_filter_t)(const uint16_t *, int, uint16_t *, int, int,
30 int, const InterpFilterParams, int, int, int,
31 int);
32 // Test parameter list:
33 // <convolve_horiz_func, convolve_vert_func,
34 // <width, height>, filter_params, subpel_x_q4, avg>
35 typedef tuple<int, int> BlockDimension;
36 typedef tuple<ConvInit, conv_filter_t, conv_filter_t, BlockDimension,
37 InterpFilter, int, int>
38 ConvParams;
39 // Test parameter list:
40 // <convolve_horiz_func, convolve_vert_func,
41 // <width, height>, filter_params, subpel_x_q4, avg, bit_dpeth>
42 typedef tuple<ConvInit, hbd_conv_filter_t, hbd_conv_filter_t, BlockDimension,
43 InterpFilter, int, int, int>
44 HbdConvParams;
46 // Note:
47 // src_ and src_ref_ have special boundary requirement
48 // dst_ and dst_ref_ don't
49 const size_t maxWidth = 256;
50 const size_t maxHeight = 256;
51 const size_t maxBlockSize = maxWidth * maxHeight;
52 const int horizOffset = 32;
53 const int vertiOffset = 32;
54 const int stride = 128;
55 const int x_step_q4 = 16;
57 class AV1ConvolveOptimzTest : public ::testing::TestWithParam<ConvParams> {
58 public:
59 virtual ~AV1ConvolveOptimzTest() {}
60 virtual void SetUp() {
61 ConvInit conv_init = GET_PARAM(0);
62 conv_init();
63 conv_horiz_ = GET_PARAM(1);
64 conv_vert_ = GET_PARAM(2);
65 BlockDimension block = GET_PARAM(3);
66 width_ = std::tr1::get<0>(block);
67 height_ = std::tr1::get<1>(block);
68 filter_ = GET_PARAM(4);
69 subpel_ = GET_PARAM(5);
70 int ref = GET_PARAM(6);
71 const int plane = 0;
72 conv_params_ = get_conv_params(ref, ref, plane);
74 alloc_ = new uint8_t[maxBlockSize * 4];
75 src_ = alloc_ + (vertiOffset * maxWidth);
76 src_ += horizOffset;
77 src_ref_ = src_ + maxBlockSize;
79 dst_ = alloc_ + 2 * maxBlockSize;
80 dst_ref_ = alloc_ + 3 * maxBlockSize;
83 virtual void TearDown() {
84 delete[] alloc_;
85 libaom_test::ClearSystemState();
88 protected:
89 void RunHorizFilterBitExactCheck();
90 void RunVertFilterBitExactCheck();
92 private:
93 void PrepFilterBuffer();
94 void DiffFilterBuffer();
95 conv_filter_t conv_horiz_;
96 conv_filter_t conv_vert_;
97 uint8_t *alloc_;
98 uint8_t *src_;
99 uint8_t *dst_;
100 uint8_t *src_ref_;
101 uint8_t *dst_ref_;
102 int width_;
103 int height_;
104 InterpFilter filter_;
105 int subpel_;
106 ConvolveParams conv_params_;
109 void AV1ConvolveOptimzTest::PrepFilterBuffer() {
110 int r, c;
111 ACMRandom rnd(ACMRandom::DeterministicSeed());
113 memset(alloc_, 0, 4 * maxBlockSize * sizeof(alloc_[0]));
115 uint8_t *src_ptr = src_;
116 uint8_t *dst_ptr = dst_;
117 uint8_t *src_ref_ptr = src_ref_;
118 uint8_t *dst_ref_ptr = dst_ref_;
120 for (r = 0; r < height_; ++r) {
121 for (c = 0; c < width_; ++c) {
122 src_ptr[c] = rnd.Rand8();
123 src_ref_ptr[c] = src_ptr[c];
124 dst_ptr[c] = rnd.Rand8();
125 dst_ref_ptr[c] = dst_ptr[c];
127 src_ptr += stride;
128 src_ref_ptr += stride;
129 dst_ptr += stride;
130 dst_ref_ptr += stride;
134 void AV1ConvolveOptimzTest::DiffFilterBuffer() {
135 int r, c;
136 const uint8_t *dst_ptr = dst_;
137 const uint8_t *dst_ref_ptr = dst_ref_;
138 for (r = 0; r < height_; ++r) {
139 for (c = 0; c < width_; ++c) {
140 EXPECT_EQ((uint8_t)dst_ref_ptr[c], (uint8_t)dst_ptr[c])
141 << "Error at row: " << r << " col: " << c << " "
142 << "w = " << width_ << " "
143 << "h = " << height_ << " "
144 << "filter group index = " << filter_ << " "
145 << "filter index = " << subpel_;
147 dst_ptr += stride;
148 dst_ref_ptr += stride;
152 void AV1ConvolveOptimzTest::RunHorizFilterBitExactCheck() {
153 PrepFilterBuffer();
155 InterpFilterParams filter_params = av1_get_interp_filter_params(filter_);
157 av1_convolve_horiz_c(src_ref_, stride, dst_ref_, stride, width_, height_,
158 filter_params, subpel_, x_step_q4, &conv_params_);
160 conv_horiz_(src_, stride, dst_, stride, width_, height_, filter_params,
161 subpel_, x_step_q4, &conv_params_);
163 DiffFilterBuffer();
165 // Note:
166 // Here we need calculate a height which is different from the specified one
167 // and test again.
168 int intermediate_height =
169 (((height_ - 1) * 16 + subpel_) >> SUBPEL_BITS) + filter_params.taps;
170 PrepFilterBuffer();
172 av1_convolve_horiz_c(src_ref_, stride, dst_ref_, stride, width_,
173 intermediate_height, filter_params, subpel_, x_step_q4,
174 &conv_params_);
176 conv_horiz_(src_, stride, dst_, stride, width_, intermediate_height,
177 filter_params, subpel_, x_step_q4, &conv_params_);
179 DiffFilterBuffer();
182 void AV1ConvolveOptimzTest::RunVertFilterBitExactCheck() {
183 PrepFilterBuffer();
185 InterpFilterParams filter_params = av1_get_interp_filter_params(filter_);
187 av1_convolve_vert_c(src_ref_, stride, dst_ref_, stride, width_, height_,
188 filter_params, subpel_, x_step_q4, &conv_params_);
190 conv_vert_(src_, stride, dst_, stride, width_, height_, filter_params,
191 subpel_, x_step_q4, &conv_params_);
193 DiffFilterBuffer();
196 TEST_P(AV1ConvolveOptimzTest, HorizBitExactCheck) {
197 RunHorizFilterBitExactCheck();
199 TEST_P(AV1ConvolveOptimzTest, VerticalBitExactCheck) {
200 RunVertFilterBitExactCheck();
203 using std::tr1::make_tuple;
205 #if (HAVE_SSSE3 || HAVE_SSE4_1) && CONFIG_DUAL_FILTER
206 const BlockDimension kBlockDim[] = {
207 make_tuple(2, 2), make_tuple(2, 4), make_tuple(4, 4),
208 make_tuple(4, 8), make_tuple(8, 4), make_tuple(8, 8),
209 make_tuple(8, 16), make_tuple(16, 8), make_tuple(16, 16),
210 make_tuple(16, 32), make_tuple(32, 16), make_tuple(32, 32),
211 make_tuple(32, 64), make_tuple(64, 32), make_tuple(64, 64),
212 make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128),
215 // 10/12-tap filters
216 const InterpFilter kFilter[] = { EIGHTTAP_REGULAR, BILINEAR, MULTITAP_SHARP };
218 const int kSubpelQ4[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
220 const int kAvg[] = { 0, 1 };
221 #endif
223 #if HAVE_SSSE3 && CONFIG_DUAL_FILTER
224 INSTANTIATE_TEST_CASE_P(
225 SSSE3, AV1ConvolveOptimzTest,
226 ::testing::Combine(::testing::Values(av1_lowbd_convolve_init_ssse3),
227 ::testing::Values(av1_convolve_horiz_ssse3),
228 ::testing::Values(av1_convolve_vert_ssse3),
229 ::testing::ValuesIn(kBlockDim),
230 ::testing::ValuesIn(kFilter),
231 ::testing::ValuesIn(kSubpelQ4),
232 ::testing::ValuesIn(kAvg)));
233 #endif // HAVE_SSSE3 && CONFIG_DUAL_FILTER
235 typedef ::testing::TestWithParam<HbdConvParams> TestWithHbdConvParams;
236 class AV1HbdConvolveOptimzTest : public TestWithHbdConvParams {
237 public:
238 virtual ~AV1HbdConvolveOptimzTest() {}
239 virtual void SetUp() {
240 ConvInit conv_init = GET_PARAM(0);
241 conv_init();
242 conv_horiz_ = GET_PARAM(1);
243 conv_vert_ = GET_PARAM(2);
244 BlockDimension block = GET_PARAM(3);
245 width_ = std::tr1::get<0>(block);
246 height_ = std::tr1::get<1>(block);
247 filter_ = GET_PARAM(4);
248 subpel_ = GET_PARAM(5);
249 avg_ = GET_PARAM(6);
250 bit_depth_ = GET_PARAM(7);
252 alloc_ = new uint16_t[maxBlockSize * 4];
253 src_ = alloc_ + (vertiOffset * maxWidth);
254 src_ += horizOffset;
255 src_ref_ = src_ + maxBlockSize;
257 dst_ = alloc_ + 2 * maxBlockSize;
258 dst_ref_ = alloc_ + 3 * maxBlockSize;
261 virtual void TearDown() {
262 delete[] alloc_;
263 libaom_test::ClearSystemState();
266 protected:
267 void RunHorizFilterBitExactCheck();
268 void RunVertFilterBitExactCheck();
270 private:
271 void PrepFilterBuffer();
272 void DiffFilterBuffer();
273 hbd_conv_filter_t conv_horiz_;
274 hbd_conv_filter_t conv_vert_;
275 uint16_t *alloc_;
276 uint16_t *src_;
277 uint16_t *dst_;
278 uint16_t *src_ref_;
279 uint16_t *dst_ref_;
280 int width_;
281 int height_;
282 InterpFilter filter_;
283 int subpel_;
284 int avg_;
285 int bit_depth_;
288 void AV1HbdConvolveOptimzTest::PrepFilterBuffer() {
289 int r, c;
290 ACMRandom rnd(ACMRandom::DeterministicSeed());
292 memset(alloc_, 0, 4 * maxBlockSize * sizeof(alloc_[0]));
294 uint16_t *src_ptr = src_;
295 uint16_t *dst_ptr = dst_;
296 uint16_t *dst_ref_ptr = dst_ref_;
297 uint16_t hbd_mask = (1 << bit_depth_) - 1;
299 for (r = 0; r < height_; ++r) {
300 for (c = 0; c < width_; ++c) {
301 src_ptr[c] = rnd.Rand16() & hbd_mask;
302 dst_ptr[c] = rnd.Rand16() & hbd_mask;
303 dst_ref_ptr[c] = dst_ptr[c];
305 src_ptr += stride;
306 dst_ptr += stride;
307 dst_ref_ptr += stride;
311 void AV1HbdConvolveOptimzTest::DiffFilterBuffer() {
312 int r, c;
313 const uint16_t *dst_ptr = dst_;
314 const uint16_t *dst_ref_ptr = dst_ref_;
315 for (r = 0; r < height_; ++r) {
316 for (c = 0; c < width_; ++c) {
317 EXPECT_EQ((uint16_t)dst_ref_ptr[c], (uint16_t)dst_ptr[c])
318 << "Error at row: " << r << " col: " << c << " "
319 << "w = " << width_ << " "
320 << "h = " << height_ << " "
321 << "filter group index = " << filter_ << " "
322 << "filter index = " << subpel_ << " "
323 << "bit depth = " << bit_depth_;
325 dst_ptr += stride;
326 dst_ref_ptr += stride;
330 void AV1HbdConvolveOptimzTest::RunHorizFilterBitExactCheck() {
331 PrepFilterBuffer();
333 InterpFilterParams filter_params = av1_get_interp_filter_params(filter_);
335 av1_highbd_convolve_horiz_c(src_, stride, dst_ref_, stride, width_, height_,
336 filter_params, subpel_, x_step_q4, avg_,
337 bit_depth_);
339 conv_horiz_(src_, stride, dst_, stride, width_, height_, filter_params,
340 subpel_, x_step_q4, avg_, bit_depth_);
342 DiffFilterBuffer();
344 // Note:
345 // Here we need calculate a height which is different from the specified one
346 // and test again.
347 int intermediate_height =
348 (((height_ - 1) * 16 + subpel_) >> SUBPEL_BITS) + filter_params.taps;
349 PrepFilterBuffer();
351 av1_highbd_convolve_horiz_c(src_, stride, dst_ref_, stride, width_,
352 intermediate_height, filter_params, subpel_,
353 x_step_q4, avg_, bit_depth_);
355 conv_horiz_(src_, stride, dst_, stride, width_, intermediate_height,
356 filter_params, subpel_, x_step_q4, avg_, bit_depth_);
358 DiffFilterBuffer();
361 void AV1HbdConvolveOptimzTest::RunVertFilterBitExactCheck() {
362 PrepFilterBuffer();
364 InterpFilterParams filter_params = av1_get_interp_filter_params(filter_);
366 av1_highbd_convolve_vert_c(src_, stride, dst_ref_, stride, width_, height_,
367 filter_params, subpel_, x_step_q4, avg_,
368 bit_depth_);
370 conv_vert_(src_, stride, dst_, stride, width_, height_, filter_params,
371 subpel_, x_step_q4, avg_, bit_depth_);
373 DiffFilterBuffer();
376 TEST_P(AV1HbdConvolveOptimzTest, HorizBitExactCheck) {
377 RunHorizFilterBitExactCheck();
379 TEST_P(AV1HbdConvolveOptimzTest, VertBitExactCheck) {
380 RunVertFilterBitExactCheck();
383 #if HAVE_SSE4_1 && CONFIG_DUAL_FILTER
385 const int kBitdepth[] = { 10, 12 };
387 INSTANTIATE_TEST_CASE_P(
388 SSE4_1, AV1HbdConvolveOptimzTest,
389 ::testing::Combine(::testing::Values(av1_highbd_convolve_init_sse4_1),
390 ::testing::Values(av1_highbd_convolve_horiz_sse4_1),
391 ::testing::Values(av1_highbd_convolve_vert_sse4_1),
392 ::testing::ValuesIn(kBlockDim),
393 ::testing::ValuesIn(kFilter),
394 ::testing::ValuesIn(kSubpelQ4),
395 ::testing::ValuesIn(kAvg),
396 ::testing::ValuesIn(kBitdepth)));
397 #endif // HAVE_SSE4_1 && CONFIG_DUAL_FILTER
398 } // namespace