Add dr prediction test
[aom.git] / test / selfguided_filter_test.cc
blob4506a90dbd601a7f96f6fed3fd23c1bbd72bf442
1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #include <ctime>
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
16 #include "config/av1_rtcd.h"
18 #include "test/acm_random.h"
19 #include "test/clear_system_state.h"
20 #include "test/register_state_check.h"
21 #include "test/util.h"
23 #include "aom_ports/aom_timer.h"
24 #include "av1/common/mv.h"
25 #include "av1/common/restoration.h"
27 namespace {
29 using ::testing::make_tuple;
30 using ::testing::tuple;
31 using libaom_test::ACMRandom;
33 typedef void (*SgrFunc)(const uint8_t *dat8, int width, int height, int stride,
34 int eps, const int *xqd, uint8_t *dst8, int dst_stride,
35 int32_t *tmpbuf, int bit_depth, int highbd);
37 // Test parameter list:
38 // <tst_fun_>
39 typedef tuple<SgrFunc> FilterTestParam;
41 class AV1SelfguidedFilterTest
42 : public ::testing::TestWithParam<FilterTestParam> {
43 public:
44 virtual ~AV1SelfguidedFilterTest() {}
45 virtual void SetUp() {}
47 virtual void TearDown() { libaom_test::ClearSystemState(); }
49 protected:
50 void RunSpeedTest() {
51 tst_fun_ = GET_PARAM(0);
52 const int pu_width = RESTORATION_PROC_UNIT_SIZE;
53 const int pu_height = RESTORATION_PROC_UNIT_SIZE;
54 const int width = 256, height = 256, stride = 288, out_stride = 288;
55 const int NUM_ITERS = 2000;
56 int i, j, k;
58 uint8_t *input_ =
59 (uint8_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint8_t));
60 uint8_t *output_ = (uint8_t *)aom_memalign(
61 32, out_stride * (height + 32) * sizeof(uint8_t));
62 int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
63 uint8_t *input = input_ + stride * 16 + 16;
64 uint8_t *output = output_ + out_stride * 16 + 16;
66 ACMRandom rnd(ACMRandom::DeterministicSeed());
68 for (i = -16; i < height + 16; ++i)
69 for (j = -16; j < width + 16; ++j)
70 input[i * stride + j] = rnd.Rand16() & 0xFF;
72 int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
73 SGRPROJ_PRJ_MIN0),
74 SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
75 SGRPROJ_PRJ_MIN1) };
76 // Fix a parameter set, since the speed depends slightly on r.
77 // Change this to test different combinations of values of r.
78 int eps = 15;
80 av1_loop_restoration_precal();
82 aom_usec_timer ref_timer;
83 aom_usec_timer_start(&ref_timer);
84 for (i = 0; i < NUM_ITERS; ++i) {
85 for (k = 0; k < height; k += pu_height)
86 for (j = 0; j < width; j += pu_width) {
87 int w = AOMMIN(pu_width, width - j);
88 int h = AOMMIN(pu_height, height - k);
89 uint8_t *input_p = input + k * stride + j;
90 uint8_t *output_p = output + k * out_stride + j;
91 apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd,
92 output_p, out_stride, tmpbuf, 8, 0);
95 aom_usec_timer_mark(&ref_timer);
96 const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
98 aom_usec_timer tst_timer;
99 aom_usec_timer_start(&tst_timer);
100 for (i = 0; i < NUM_ITERS; ++i) {
101 for (k = 0; k < height; k += pu_height)
102 for (j = 0; j < width; j += pu_width) {
103 int w = AOMMIN(pu_width, width - j);
104 int h = AOMMIN(pu_height, height - k);
105 uint8_t *input_p = input + k * stride + j;
106 uint8_t *output_p = output + k * out_stride + j;
107 tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride,
108 tmpbuf, 8, 0);
111 aom_usec_timer_mark(&tst_timer);
112 const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
114 std::cout << "[ ] C time = " << ref_time / 1000
115 << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
117 EXPECT_GT(ref_time, tst_time)
118 << "Error: AV1SelfguidedFilterTest.SpeedTest, SIMD slower than C.\n"
119 << "C time: " << ref_time << " us\n"
120 << "SIMD time: " << tst_time << " us\n";
122 aom_free(input_);
123 aom_free(output_);
124 aom_free(tmpbuf);
127 void RunCorrectnessTest() {
128 tst_fun_ = GET_PARAM(0);
129 const int pu_width = RESTORATION_PROC_UNIT_SIZE;
130 const int pu_height = RESTORATION_PROC_UNIT_SIZE;
131 // Set the maximum width/height to test here. We actually test a small
132 // range of sizes *up to* this size, so that we can check, eg.,
133 // the behaviour on tiles which are not a multiple of 4 wide.
134 const int max_w = 260, max_h = 260, stride = 672, out_stride = 672;
135 const int NUM_ITERS = 81;
136 int i, j, k;
138 uint8_t *input_ =
139 (uint8_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint8_t));
140 uint8_t *output_ = (uint8_t *)aom_memalign(
141 32, out_stride * (max_h + 32) * sizeof(uint8_t));
142 uint8_t *output2_ = (uint8_t *)aom_memalign(
143 32, out_stride * (max_h + 32) * sizeof(uint8_t));
144 int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
146 uint8_t *input = input_ + stride * 16 + 16;
147 uint8_t *output = output_ + out_stride * 16 + 16;
148 uint8_t *output2 = output2_ + out_stride * 16 + 16;
150 ACMRandom rnd(ACMRandom::DeterministicSeed());
152 av1_loop_restoration_precal();
154 for (i = 0; i < NUM_ITERS; ++i) {
155 for (j = -16; j < max_h + 16; ++j)
156 for (k = -16; k < max_w + 16; ++k)
157 input[j * stride + k] = rnd.Rand16() & 0xFF;
159 int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
160 SGRPROJ_PRJ_MIN0),
161 SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
162 SGRPROJ_PRJ_MIN1) };
163 int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
165 // Test various tile sizes around 256x256
166 int test_w = max_w - (i / 9);
167 int test_h = max_h - (i % 9);
169 for (k = 0; k < test_h; k += pu_height)
170 for (j = 0; j < test_w; j += pu_width) {
171 int w = AOMMIN(pu_width, test_w - j);
172 int h = AOMMIN(pu_height, test_h - k);
173 uint8_t *input_p = input + k * stride + j;
174 uint8_t *output_p = output + k * out_stride + j;
175 uint8_t *output2_p = output2 + k * out_stride + j;
176 tst_fun_(input_p, w, h, stride, eps, xqd, output_p, out_stride,
177 tmpbuf, 8, 0);
178 apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd,
179 output2_p, out_stride, tmpbuf, 8, 0);
182 for (j = 0; j < test_h; ++j)
183 for (k = 0; k < test_w; ++k) {
184 ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
188 aom_free(input_);
189 aom_free(output_);
190 aom_free(output2_);
191 aom_free(tmpbuf);
194 private:
195 SgrFunc tst_fun_;
198 TEST_P(AV1SelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); }
199 TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
201 #if HAVE_SSE4_1
202 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1SelfguidedFilterTest,
203 ::testing::Values(apply_selfguided_restoration_sse4_1));
204 #endif
206 #if HAVE_AVX2
207 INSTANTIATE_TEST_CASE_P(AVX2, AV1SelfguidedFilterTest,
208 ::testing::Values(apply_selfguided_restoration_avx2));
209 #endif
211 // Test parameter list:
212 // <tst_fun_, bit_depth>
213 typedef tuple<SgrFunc, int> HighbdFilterTestParam;
215 class AV1HighbdSelfguidedFilterTest
216 : public ::testing::TestWithParam<HighbdFilterTestParam> {
217 public:
218 virtual ~AV1HighbdSelfguidedFilterTest() {}
219 virtual void SetUp() {}
221 virtual void TearDown() { libaom_test::ClearSystemState(); }
223 protected:
224 void RunSpeedTest() {
225 tst_fun_ = GET_PARAM(0);
226 const int pu_width = RESTORATION_PROC_UNIT_SIZE;
227 const int pu_height = RESTORATION_PROC_UNIT_SIZE;
228 const int width = 256, height = 256, stride = 288, out_stride = 288;
229 const int NUM_ITERS = 2000;
230 int i, j, k;
231 int bit_depth = GET_PARAM(1);
232 int mask = (1 << bit_depth) - 1;
234 uint16_t *input_ =
235 (uint16_t *)aom_memalign(32, stride * (height + 32) * sizeof(uint16_t));
236 uint16_t *output_ = (uint16_t *)aom_memalign(
237 32, out_stride * (height + 32) * sizeof(uint16_t));
238 int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
239 uint16_t *input = input_ + stride * 16 + 16;
240 uint16_t *output = output_ + out_stride * 16 + 16;
242 ACMRandom rnd(ACMRandom::DeterministicSeed());
244 for (i = -16; i < height + 16; ++i)
245 for (j = -16; j < width + 16; ++j)
246 input[i * stride + j] = rnd.Rand16() & mask;
248 int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
249 SGRPROJ_PRJ_MIN0),
250 SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
251 SGRPROJ_PRJ_MIN1) };
252 // Fix a parameter set, since the speed depends slightly on r.
253 // Change this to test different combinations of values of r.
254 int eps = 15;
256 av1_loop_restoration_precal();
258 aom_usec_timer ref_timer;
259 aom_usec_timer_start(&ref_timer);
260 for (i = 0; i < NUM_ITERS; ++i) {
261 for (k = 0; k < height; k += pu_height)
262 for (j = 0; j < width; j += pu_width) {
263 int w = AOMMIN(pu_width, width - j);
264 int h = AOMMIN(pu_height, height - k);
265 uint16_t *input_p = input + k * stride + j;
266 uint16_t *output_p = output + k * out_stride + j;
267 apply_selfguided_restoration_c(
268 CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
269 CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, 1);
272 aom_usec_timer_mark(&ref_timer);
273 const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
275 aom_usec_timer tst_timer;
276 aom_usec_timer_start(&tst_timer);
277 for (i = 0; i < NUM_ITERS; ++i) {
278 for (k = 0; k < height; k += pu_height)
279 for (j = 0; j < width; j += pu_width) {
280 int w = AOMMIN(pu_width, width - j);
281 int h = AOMMIN(pu_height, height - k);
282 uint16_t *input_p = input + k * stride + j;
283 uint16_t *output_p = output + k * out_stride + j;
284 tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
285 CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth,
289 aom_usec_timer_mark(&tst_timer);
290 const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
292 std::cout << "[ ] C time = " << ref_time / 1000
293 << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
295 EXPECT_GT(ref_time, tst_time)
296 << "Error: AV1HighbdSelfguidedFilterTest.SpeedTest, SIMD slower than "
297 "C.\n"
298 << "C time: " << ref_time << " us\n"
299 << "SIMD time: " << tst_time << " us\n";
301 aom_free(input_);
302 aom_free(output_);
303 aom_free(tmpbuf);
306 void RunCorrectnessTest() {
307 tst_fun_ = GET_PARAM(0);
308 const int pu_width = RESTORATION_PROC_UNIT_SIZE;
309 const int pu_height = RESTORATION_PROC_UNIT_SIZE;
310 // Set the maximum width/height to test here. We actually test a small
311 // range of sizes *up to* this size, so that we can check, eg.,
312 // the behaviour on tiles which are not a multiple of 4 wide.
313 const int max_w = 260, max_h = 260, stride = 672, out_stride = 672;
314 const int NUM_ITERS = 81;
315 int i, j, k;
316 int bit_depth = GET_PARAM(1);
317 int mask = (1 << bit_depth) - 1;
319 uint16_t *input_ =
320 (uint16_t *)aom_memalign(32, stride * (max_h + 32) * sizeof(uint16_t));
321 uint16_t *output_ = (uint16_t *)aom_memalign(
322 32, out_stride * (max_h + 32) * sizeof(uint16_t));
323 uint16_t *output2_ = (uint16_t *)aom_memalign(
324 32, out_stride * (max_h + 32) * sizeof(uint16_t));
325 int32_t *tmpbuf = (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE);
327 uint16_t *input = input_ + stride * 16 + 16;
328 uint16_t *output = output_ + out_stride * 16 + 16;
329 uint16_t *output2 = output2_ + out_stride * 16 + 16;
331 ACMRandom rnd(ACMRandom::DeterministicSeed());
333 av1_loop_restoration_precal();
335 for (i = 0; i < NUM_ITERS; ++i) {
336 for (j = -16; j < max_h + 16; ++j)
337 for (k = -16; k < max_w + 16; ++k)
338 input[j * stride + k] = rnd.Rand16() & mask;
340 int xqd[2] = { SGRPROJ_PRJ_MIN0 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 -
341 SGRPROJ_PRJ_MIN0),
342 SGRPROJ_PRJ_MIN1 + rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 -
343 SGRPROJ_PRJ_MIN1) };
344 int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
346 // Test various tile sizes around 256x256
347 int test_w = max_w - (i / 9);
348 int test_h = max_h - (i % 9);
350 for (k = 0; k < test_h; k += pu_height)
351 for (j = 0; j < test_w; j += pu_width) {
352 int w = AOMMIN(pu_width, test_w - j);
353 int h = AOMMIN(pu_height, test_h - k);
354 uint16_t *input_p = input + k * stride + j;
355 uint16_t *output_p = output + k * out_stride + j;
356 uint16_t *output2_p = output2 + k * out_stride + j;
357 tst_fun_(CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
358 CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth,
360 apply_selfguided_restoration_c(
361 CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
362 CONVERT_TO_BYTEPTR(output2_p), out_stride, tmpbuf, bit_depth, 1);
365 for (j = 0; j < test_h; ++j)
366 for (k = 0; k < test_w; ++k)
367 ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
370 aom_free(input_);
371 aom_free(output_);
372 aom_free(output2_);
373 aom_free(tmpbuf);
376 private:
377 SgrFunc tst_fun_;
380 TEST_P(AV1HighbdSelfguidedFilterTest, DISABLED_SpeedTest) { RunSpeedTest(); }
381 TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
383 #if HAVE_SSE4_1
384 const int highbd_params_sse4_1[] = { 8, 10, 12 };
385 INSTANTIATE_TEST_CASE_P(
386 SSE4_1, AV1HighbdSelfguidedFilterTest,
387 ::testing::Combine(::testing::Values(apply_selfguided_restoration_sse4_1),
388 ::testing::ValuesIn(highbd_params_sse4_1)));
389 #endif
391 #if HAVE_AVX2
392 const int highbd_params_avx2[] = { 8, 10, 12 };
393 INSTANTIATE_TEST_CASE_P(
394 AVX2, AV1HighbdSelfguidedFilterTest,
395 ::testing::Combine(::testing::Values(apply_selfguided_restoration_avx2),
396 ::testing::ValuesIn(highbd_params_avx2)));
397 #endif
399 } // namespace