av1_convolve_ x,y _avx2() -- use 256 bit load/store
[aom.git] / test / selfguided_filter_test.cc
blob093f9dca5b601b9d115b64b23926b6495b9a0f16
1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #include <ctime>
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
16 #include "./av1_rtcd.h"
17 #include "test/acm_random.h"
18 #include "test/clear_system_state.h"
19 #include "test/register_state_check.h"
20 #include "test/util.h"
22 #include "aom_ports/aom_timer.h"
23 #include "av1/common/mv.h"
24 #include "av1/common/restoration.h"
26 namespace {
28 using std::tr1::tuple;
29 using std::tr1::make_tuple;
30 using libaom_test::ACMRandom;
32 typedef tuple<> FilterTestParam;
34 class AV1SelfguidedFilterTest
35 : public ::testing::TestWithParam<FilterTestParam> {
36 public:
37 virtual ~AV1SelfguidedFilterTest() {}
38 virtual void SetUp() {}
40 virtual void TearDown() { libaom_test::ClearSystemState(); }
42 protected:
43 void RunSpeedTest() {
44 const int pu_width = RESTORATION_PROC_UNIT_SIZE;
45 const int pu_height = RESTORATION_PROC_UNIT_SIZE;
46 const int width = 256, height = 256, stride = 288, out_stride = 288;
47 const int NUM_ITERS = 2000;
48 int i, j, k;
50 uint8_t *input_ =
51 (uint8_t *)aom_memalign(16, stride * (height + 32) * sizeof(uint8_t));
52 uint8_t *output_ = (uint8_t *)aom_memalign(
53 16, out_stride * (height + 32) * sizeof(uint8_t));
54 int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
55 uint8_t *input = input_ + stride * 16 + 16;
56 uint8_t *output = output_ + out_stride * 16 + 16;
58 ACMRandom rnd(ACMRandom::DeterministicSeed());
60 for (i = -16; i < height + 16; ++i)
61 for (j = -16; j < width + 16; ++j)
62 input[i * stride + j] = rnd.Rand16() & 0xFF;
64 int xqd[2] = {
65 SGRPROJ_PRJ_MIN0 +
66 rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
67 SGRPROJ_PRJ_MIN1 +
68 rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
70 // Fix a parameter set, since the speed depends slightly on r.
71 // Change this to test different combinations of values of r.
72 int eps = 15;
74 av1_loop_restoration_precal();
76 std::clock_t start = std::clock();
77 for (i = 0; i < NUM_ITERS; ++i) {
78 for (k = 0; k < height; k += pu_height)
79 for (j = 0; j < width; j += pu_width) {
80 int w = AOMMIN(pu_width, width - j);
81 int h = AOMMIN(pu_height, height - k);
82 uint8_t *input_p = input + k * stride + j;
83 uint8_t *output_p = output + k * out_stride + j;
84 apply_selfguided_restoration(input_p, w, h, stride, eps, xqd,
85 output_p, out_stride, tmpbuf, 8, 0);
88 std::clock_t end = std::clock();
89 double elapsed = ((end - start) / (double)CLOCKS_PER_SEC);
91 printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, width,
92 height, elapsed, elapsed * 1000000. / NUM_ITERS);
94 aom_free(input_);
95 aom_free(output_);
96 aom_free(tmpbuf);
99 void RunCorrectnessTest() {
100 const int pu_width = RESTORATION_PROC_UNIT_SIZE;
101 const int pu_height = RESTORATION_PROC_UNIT_SIZE;
102 // Set the maximum width/height to test here. We actually test a small
103 // range of sizes *up to* this size, so that we can check, eg.,
104 // the behaviour on tiles which are not a multiple of 4 wide.
105 const int max_w = 260, max_h = 260, stride = 672, out_stride = 672;
106 const int NUM_ITERS = 81;
107 int i, j, k;
109 uint8_t *input_ =
110 (uint8_t *)aom_memalign(16, stride * (max_h + 32) * sizeof(uint8_t));
111 uint8_t *output_ = (uint8_t *)aom_memalign(
112 16, out_stride * (max_h + 32) * sizeof(uint8_t));
113 uint8_t *output2_ = (uint8_t *)aom_memalign(
114 16, out_stride * (max_h + 32) * sizeof(uint8_t));
115 int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
117 uint8_t *input = input_ + stride * 16 + 16;
118 uint8_t *output = output_ + out_stride * 16 + 16;
119 uint8_t *output2 = output2_ + out_stride * 16 + 16;
121 ACMRandom rnd(ACMRandom::DeterministicSeed());
123 av1_loop_restoration_precal();
125 for (i = 0; i < NUM_ITERS; ++i) {
126 for (j = -16; j < max_h + 16; ++j)
127 for (k = -16; k < max_w + 16; ++k)
128 input[j * stride + k] = rnd.Rand16() & 0xFF;
130 int xqd[2] = {
131 SGRPROJ_PRJ_MIN0 +
132 rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
133 SGRPROJ_PRJ_MIN1 +
134 rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
136 int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
138 // Test various tile sizes around 256x256
139 int test_w = max_w - (i / 9);
140 int test_h = max_h - (i % 9);
142 for (k = 0; k < test_h; k += pu_height)
143 for (j = 0; j < test_w; j += pu_width) {
144 int w = AOMMIN(pu_width, test_w - j);
145 int h = AOMMIN(pu_height, test_h - k);
146 uint8_t *input_p = input + k * stride + j;
147 uint8_t *output_p = output + k * out_stride + j;
148 uint8_t *output2_p = output2 + k * out_stride + j;
149 apply_selfguided_restoration(input_p, w, h, stride, eps, xqd,
150 output_p, out_stride, tmpbuf, 8, 0);
151 apply_selfguided_restoration_c(input_p, w, h, stride, eps, xqd,
152 output2_p, out_stride, tmpbuf, 8, 0);
155 apply_selfguided_restoration(input, test_w, test_h, stride, eps, xqd,
156 output, out_stride, tmpbuf);
157 apply_selfguided_restoration_c(input, test_w, test_h, stride, eps, xqd,
158 output2, out_stride, tmpbuf);
160 for (j = 0; j < test_h; ++j)
161 for (k = 0; k < test_w; ++k) {
162 ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
166 aom_free(input_);
167 aom_free(output_);
168 aom_free(output2_);
169 aom_free(tmpbuf);
173 TEST_P(AV1SelfguidedFilterTest, SpeedTest) { RunSpeedTest(); }
174 TEST_P(AV1SelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
176 #if HAVE_SSE4_1
177 const FilterTestParam params[] = { make_tuple() };
178 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1SelfguidedFilterTest,
179 ::testing::ValuesIn(params));
180 #endif
182 typedef tuple<int> HighbdFilterTestParam;
184 class AV1HighbdSelfguidedFilterTest
185 : public ::testing::TestWithParam<HighbdFilterTestParam> {
186 public:
187 virtual ~AV1HighbdSelfguidedFilterTest() {}
188 virtual void SetUp() {}
190 virtual void TearDown() { libaom_test::ClearSystemState(); }
192 protected:
193 void RunSpeedTest() {
194 const int pu_width = RESTORATION_PROC_UNIT_SIZE;
195 const int pu_height = RESTORATION_PROC_UNIT_SIZE;
196 const int width = 256, height = 256, stride = 288, out_stride = 288;
197 const int NUM_ITERS = 2000;
198 int i, j, k;
199 int bit_depth = GET_PARAM(0);
200 int mask = (1 << bit_depth) - 1;
202 uint16_t *input_ =
203 (uint16_t *)aom_memalign(16, stride * (height + 32) * sizeof(uint16_t));
204 uint16_t *output_ = (uint16_t *)aom_memalign(
205 16, out_stride * (height + 32) * sizeof(uint16_t));
206 int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
207 uint16_t *input = input_ + stride * 16 + 16;
208 uint16_t *output = output_ + out_stride * 16 + 16;
210 ACMRandom rnd(ACMRandom::DeterministicSeed());
212 for (i = -16; i < height + 16; ++i)
213 for (j = -16; j < width + 16; ++j)
214 input[i * stride + j] = rnd.Rand16() & mask;
216 int xqd[2] = {
217 SGRPROJ_PRJ_MIN0 +
218 rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
219 SGRPROJ_PRJ_MIN1 +
220 rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
222 // Fix a parameter set, since the speed depends slightly on r.
223 // Change this to test different combinations of values of r.
224 int eps = 15;
226 av1_loop_restoration_precal();
228 aom_usec_timer timer;
229 aom_usec_timer_start(&timer);
230 for (i = 0; i < NUM_ITERS; ++i) {
231 for (k = 0; k < height; k += pu_height)
232 for (j = 0; j < width; j += pu_width) {
233 int w = AOMMIN(pu_width, width - j);
234 int h = AOMMIN(pu_height, height - k);
235 uint16_t *input_p = input + k * stride + j;
236 uint16_t *output_p = output + k * out_stride + j;
237 apply_selfguided_restoration(
238 CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
239 CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, 1);
242 aom_usec_timer_mark(&timer);
243 double elapsed = static_cast<double>(aom_usec_timer_elapsed(&timer));
245 printf("%5d %dx%d blocks in %7.3fs = %7.3fus/block\n", NUM_ITERS, width,
246 height, elapsed / 1000000, elapsed / NUM_ITERS);
248 aom_free(input_);
249 aom_free(output_);
250 aom_free(tmpbuf);
253 void RunCorrectnessTest() {
254 const int pu_width = RESTORATION_PROC_UNIT_SIZE;
255 const int pu_height = RESTORATION_PROC_UNIT_SIZE;
256 // Set the maximum width/height to test here. We actually test a small
257 // range of sizes *up to* this size, so that we can check, eg.,
258 // the behaviour on tiles which are not a multiple of 4 wide.
259 const int max_w = 260, max_h = 260, stride = 672, out_stride = 672;
260 const int NUM_ITERS = 81;
261 int i, j, k;
262 int bit_depth = GET_PARAM(0);
263 int mask = (1 << bit_depth) - 1;
265 uint16_t *input_ =
266 (uint16_t *)aom_memalign(16, stride * (max_h + 32) * sizeof(uint16_t));
267 uint16_t *output_ = (uint16_t *)aom_memalign(
268 16, out_stride * (max_h + 32) * sizeof(uint16_t));
269 uint16_t *output2_ = (uint16_t *)aom_memalign(
270 16, out_stride * (max_h + 32) * sizeof(uint16_t));
271 int32_t *tmpbuf = (int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE);
273 uint16_t *input = input_ + stride * 16 + 16;
274 uint16_t *output = output_ + out_stride * 16 + 16;
275 uint16_t *output2 = output2_ + out_stride * 16 + 16;
277 ACMRandom rnd(ACMRandom::DeterministicSeed());
279 av1_loop_restoration_precal();
281 for (i = 0; i < NUM_ITERS; ++i) {
282 for (j = -16; j < max_h + 16; ++j)
283 for (k = -16; k < max_w + 16; ++k)
284 input[j * stride + k] = rnd.Rand16() & mask;
286 int xqd[2] = {
287 SGRPROJ_PRJ_MIN0 +
288 rnd.PseudoUniform(SGRPROJ_PRJ_MAX0 + 1 - SGRPROJ_PRJ_MIN0),
289 SGRPROJ_PRJ_MIN1 +
290 rnd.PseudoUniform(SGRPROJ_PRJ_MAX1 + 1 - SGRPROJ_PRJ_MIN1)
292 int eps = rnd.PseudoUniform(1 << SGRPROJ_PARAMS_BITS);
294 // Test various tile sizes around 256x256
295 int test_w = max_w - (i / 9);
296 int test_h = max_h - (i % 9);
298 for (k = 0; k < test_h; k += pu_height)
299 for (j = 0; j < test_w; j += pu_width) {
300 int w = AOMMIN(pu_width, test_w - j);
301 int h = AOMMIN(pu_height, test_h - k);
302 uint16_t *input_p = input + k * stride + j;
303 uint16_t *output_p = output + k * out_stride + j;
304 uint16_t *output2_p = output2 + k * out_stride + j;
305 apply_selfguided_restoration(
306 CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
307 CONVERT_TO_BYTEPTR(output_p), out_stride, tmpbuf, bit_depth, 1);
308 apply_selfguided_restoration_c(
309 CONVERT_TO_BYTEPTR(input_p), w, h, stride, eps, xqd,
310 CONVERT_TO_BYTEPTR(output2_p), out_stride, tmpbuf, bit_depth, 1);
314 apply_selfguided_restoration_highbd(input, test_w, test_h, stride,
315 bit_depth, eps, xqd, output,
316 out_stride, tmpbuf);
317 apply_selfguided_restoration_highbd_c(input, test_w, test_h, stride,
318 bit_depth, eps, xqd, output2,
319 out_stride, tmpbuf);
321 for (j = 0; j < test_h; ++j)
322 for (k = 0; k < test_w; ++k)
323 ASSERT_EQ(output[j * out_stride + k], output2[j * out_stride + k]);
326 aom_free(input_);
327 aom_free(output_);
328 aom_free(output2_);
329 aom_free(tmpbuf);
333 TEST_P(AV1HighbdSelfguidedFilterTest, SpeedTest) { RunSpeedTest(); }
334 TEST_P(AV1HighbdSelfguidedFilterTest, CorrectnessTest) { RunCorrectnessTest(); }
336 #if HAVE_SSE4_1
337 const HighbdFilterTestParam highbd_params[] = { make_tuple(8), make_tuple(10),
338 make_tuple(12) };
339 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdSelfguidedFilterTest,
340 ::testing::ValuesIn(highbd_params));
341 #endif
343 } // namespace