2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
16 #include "config/av1_rtcd.h"
18 #include "test/acm_random.h"
19 #include "test/clear_system_state.h"
20 #include "test/register_state_check.h"
21 #include "test/util.h"
23 #include "aom_ports/aom_timer.h"
24 #include "av1/common/mv.h"
25 #include "av1/common/restoration.h"
29 using ::testing::make_tuple
;
30 using ::testing::tuple
;
31 using libaom_test::ACMRandom
;
33 typedef void (*SgrFunc
)(const uint8_t *dat8
, int width
, int height
, int stride
,
34 int eps
, const int *xqd
, uint8_t *dst8
, int dst_stride
,
35 int32_t *tmpbuf
, int bit_depth
, int highbd
);
37 // Test parameter list:
39 typedef tuple
<SgrFunc
> FilterTestParam
;
41 class AV1SelfguidedFilterTest
42 : public ::testing::TestWithParam
<FilterTestParam
> {
44 virtual ~AV1SelfguidedFilterTest() {}
45 virtual void SetUp() {}
47 virtual void TearDown() { libaom_test::ClearSystemState(); }
51 tst_fun_
= GET_PARAM(0);
52 const int pu_width
= RESTORATION_PROC_UNIT_SIZE
;
53 const int pu_height
= RESTORATION_PROC_UNIT_SIZE
;
54 const int width
= 256, height
= 256, stride
= 288, out_stride
= 288;
55 const int NUM_ITERS
= 2000;
59 (uint8_t *)aom_memalign(32, stride
* (height
+ 32) * sizeof(uint8_t));
60 uint8_t *output_
= (uint8_t *)aom_memalign(
61 32, out_stride
* (height
+ 32) * sizeof(uint8_t));
62 int32_t *tmpbuf
= (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE
);
63 uint8_t *input
= input_
+ stride
* 16 + 16;
64 uint8_t *output
= output_
+ out_stride
* 16 + 16;
66 ACMRandom
rnd(ACMRandom::DeterministicSeed());
68 for (i
= -16; i
< height
+ 16; ++i
)
69 for (j
= -16; j
< width
+ 16; ++j
)
70 input
[i
* stride
+ j
] = rnd
.Rand16() & 0xFF;
72 int xqd
[2] = { SGRPROJ_PRJ_MIN0
+ rnd
.PseudoUniform(SGRPROJ_PRJ_MAX0
+ 1 -
74 SGRPROJ_PRJ_MIN1
+ rnd
.PseudoUniform(SGRPROJ_PRJ_MAX1
+ 1 -
76 // Fix a parameter set, since the speed depends slightly on r.
77 // Change this to test different combinations of values of r.
80 av1_loop_restoration_precal();
82 aom_usec_timer ref_timer
;
83 aom_usec_timer_start(&ref_timer
);
84 for (i
= 0; i
< NUM_ITERS
; ++i
) {
85 for (k
= 0; k
< height
; k
+= pu_height
)
86 for (j
= 0; j
< width
; j
+= pu_width
) {
87 int w
= AOMMIN(pu_width
, width
- j
);
88 int h
= AOMMIN(pu_height
, height
- k
);
89 uint8_t *input_p
= input
+ k
* stride
+ j
;
90 uint8_t *output_p
= output
+ k
* out_stride
+ j
;
91 apply_selfguided_restoration_c(input_p
, w
, h
, stride
, eps
, xqd
,
92 output_p
, out_stride
, tmpbuf
, 8, 0);
95 aom_usec_timer_mark(&ref_timer
);
96 const int64_t ref_time
= aom_usec_timer_elapsed(&ref_timer
);
98 aom_usec_timer tst_timer
;
99 aom_usec_timer_start(&tst_timer
);
100 for (i
= 0; i
< NUM_ITERS
; ++i
) {
101 for (k
= 0; k
< height
; k
+= pu_height
)
102 for (j
= 0; j
< width
; j
+= pu_width
) {
103 int w
= AOMMIN(pu_width
, width
- j
);
104 int h
= AOMMIN(pu_height
, height
- k
);
105 uint8_t *input_p
= input
+ k
* stride
+ j
;
106 uint8_t *output_p
= output
+ k
* out_stride
+ j
;
107 tst_fun_(input_p
, w
, h
, stride
, eps
, xqd
, output_p
, out_stride
,
111 aom_usec_timer_mark(&tst_timer
);
112 const int64_t tst_time
= aom_usec_timer_elapsed(&tst_timer
);
114 std::cout
<< "[ ] C time = " << ref_time
/ 1000
115 << " ms, SIMD time = " << tst_time
/ 1000 << " ms\n";
117 EXPECT_GT(ref_time
, tst_time
)
118 << "Error: AV1SelfguidedFilterTest.SpeedTest, SIMD slower than C.\n"
119 << "C time: " << ref_time
<< " us\n"
120 << "SIMD time: " << tst_time
<< " us\n";
127 void RunCorrectnessTest() {
128 tst_fun_
= GET_PARAM(0);
129 const int pu_width
= RESTORATION_PROC_UNIT_SIZE
;
130 const int pu_height
= RESTORATION_PROC_UNIT_SIZE
;
131 // Set the maximum width/height to test here. We actually test a small
132 // range of sizes *up to* this size, so that we can check, eg.,
133 // the behaviour on tiles which are not a multiple of 4 wide.
134 const int max_w
= 260, max_h
= 260, stride
= 672, out_stride
= 672;
135 const int NUM_ITERS
= 81;
139 (uint8_t *)aom_memalign(32, stride
* (max_h
+ 32) * sizeof(uint8_t));
140 uint8_t *output_
= (uint8_t *)aom_memalign(
141 32, out_stride
* (max_h
+ 32) * sizeof(uint8_t));
142 uint8_t *output2_
= (uint8_t *)aom_memalign(
143 32, out_stride
* (max_h
+ 32) * sizeof(uint8_t));
144 int32_t *tmpbuf
= (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE
);
146 uint8_t *input
= input_
+ stride
* 16 + 16;
147 uint8_t *output
= output_
+ out_stride
* 16 + 16;
148 uint8_t *output2
= output2_
+ out_stride
* 16 + 16;
150 ACMRandom
rnd(ACMRandom::DeterministicSeed());
152 av1_loop_restoration_precal();
154 for (i
= 0; i
< NUM_ITERS
; ++i
) {
155 for (j
= -16; j
< max_h
+ 16; ++j
)
156 for (k
= -16; k
< max_w
+ 16; ++k
)
157 input
[j
* stride
+ k
] = rnd
.Rand16() & 0xFF;
159 int xqd
[2] = { SGRPROJ_PRJ_MIN0
+ rnd
.PseudoUniform(SGRPROJ_PRJ_MAX0
+ 1 -
161 SGRPROJ_PRJ_MIN1
+ rnd
.PseudoUniform(SGRPROJ_PRJ_MAX1
+ 1 -
163 int eps
= rnd
.PseudoUniform(1 << SGRPROJ_PARAMS_BITS
);
165 // Test various tile sizes around 256x256
166 int test_w
= max_w
- (i
/ 9);
167 int test_h
= max_h
- (i
% 9);
169 for (k
= 0; k
< test_h
; k
+= pu_height
)
170 for (j
= 0; j
< test_w
; j
+= pu_width
) {
171 int w
= AOMMIN(pu_width
, test_w
- j
);
172 int h
= AOMMIN(pu_height
, test_h
- k
);
173 uint8_t *input_p
= input
+ k
* stride
+ j
;
174 uint8_t *output_p
= output
+ k
* out_stride
+ j
;
175 uint8_t *output2_p
= output2
+ k
* out_stride
+ j
;
176 tst_fun_(input_p
, w
, h
, stride
, eps
, xqd
, output_p
, out_stride
,
178 apply_selfguided_restoration_c(input_p
, w
, h
, stride
, eps
, xqd
,
179 output2_p
, out_stride
, tmpbuf
, 8, 0);
182 for (j
= 0; j
< test_h
; ++j
)
183 for (k
= 0; k
< test_w
; ++k
) {
184 ASSERT_EQ(output
[j
* out_stride
+ k
], output2
[j
* out_stride
+ k
]);
198 TEST_P(AV1SelfguidedFilterTest
, DISABLED_SpeedTest
) { RunSpeedTest(); }
199 TEST_P(AV1SelfguidedFilterTest
, CorrectnessTest
) { RunCorrectnessTest(); }
202 INSTANTIATE_TEST_CASE_P(SSE4_1
, AV1SelfguidedFilterTest
,
203 ::testing::Values(apply_selfguided_restoration_sse4_1
));
207 INSTANTIATE_TEST_CASE_P(AVX2
, AV1SelfguidedFilterTest
,
208 ::testing::Values(apply_selfguided_restoration_avx2
));
211 // Test parameter list:
212 // <tst_fun_, bit_depth>
213 typedef tuple
<SgrFunc
, int> HighbdFilterTestParam
;
215 class AV1HighbdSelfguidedFilterTest
216 : public ::testing::TestWithParam
<HighbdFilterTestParam
> {
218 virtual ~AV1HighbdSelfguidedFilterTest() {}
219 virtual void SetUp() {}
221 virtual void TearDown() { libaom_test::ClearSystemState(); }
224 void RunSpeedTest() {
225 tst_fun_
= GET_PARAM(0);
226 const int pu_width
= RESTORATION_PROC_UNIT_SIZE
;
227 const int pu_height
= RESTORATION_PROC_UNIT_SIZE
;
228 const int width
= 256, height
= 256, stride
= 288, out_stride
= 288;
229 const int NUM_ITERS
= 2000;
231 int bit_depth
= GET_PARAM(1);
232 int mask
= (1 << bit_depth
) - 1;
235 (uint16_t *)aom_memalign(32, stride
* (height
+ 32) * sizeof(uint16_t));
236 uint16_t *output_
= (uint16_t *)aom_memalign(
237 32, out_stride
* (height
+ 32) * sizeof(uint16_t));
238 int32_t *tmpbuf
= (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE
);
239 uint16_t *input
= input_
+ stride
* 16 + 16;
240 uint16_t *output
= output_
+ out_stride
* 16 + 16;
242 ACMRandom
rnd(ACMRandom::DeterministicSeed());
244 for (i
= -16; i
< height
+ 16; ++i
)
245 for (j
= -16; j
< width
+ 16; ++j
)
246 input
[i
* stride
+ j
] = rnd
.Rand16() & mask
;
248 int xqd
[2] = { SGRPROJ_PRJ_MIN0
+ rnd
.PseudoUniform(SGRPROJ_PRJ_MAX0
+ 1 -
250 SGRPROJ_PRJ_MIN1
+ rnd
.PseudoUniform(SGRPROJ_PRJ_MAX1
+ 1 -
252 // Fix a parameter set, since the speed depends slightly on r.
253 // Change this to test different combinations of values of r.
256 av1_loop_restoration_precal();
258 aom_usec_timer ref_timer
;
259 aom_usec_timer_start(&ref_timer
);
260 for (i
= 0; i
< NUM_ITERS
; ++i
) {
261 for (k
= 0; k
< height
; k
+= pu_height
)
262 for (j
= 0; j
< width
; j
+= pu_width
) {
263 int w
= AOMMIN(pu_width
, width
- j
);
264 int h
= AOMMIN(pu_height
, height
- k
);
265 uint16_t *input_p
= input
+ k
* stride
+ j
;
266 uint16_t *output_p
= output
+ k
* out_stride
+ j
;
267 apply_selfguided_restoration_c(
268 CONVERT_TO_BYTEPTR(input_p
), w
, h
, stride
, eps
, xqd
,
269 CONVERT_TO_BYTEPTR(output_p
), out_stride
, tmpbuf
, bit_depth
, 1);
272 aom_usec_timer_mark(&ref_timer
);
273 const int64_t ref_time
= aom_usec_timer_elapsed(&ref_timer
);
275 aom_usec_timer tst_timer
;
276 aom_usec_timer_start(&tst_timer
);
277 for (i
= 0; i
< NUM_ITERS
; ++i
) {
278 for (k
= 0; k
< height
; k
+= pu_height
)
279 for (j
= 0; j
< width
; j
+= pu_width
) {
280 int w
= AOMMIN(pu_width
, width
- j
);
281 int h
= AOMMIN(pu_height
, height
- k
);
282 uint16_t *input_p
= input
+ k
* stride
+ j
;
283 uint16_t *output_p
= output
+ k
* out_stride
+ j
;
284 tst_fun_(CONVERT_TO_BYTEPTR(input_p
), w
, h
, stride
, eps
, xqd
,
285 CONVERT_TO_BYTEPTR(output_p
), out_stride
, tmpbuf
, bit_depth
,
289 aom_usec_timer_mark(&tst_timer
);
290 const int64_t tst_time
= aom_usec_timer_elapsed(&tst_timer
);
292 std::cout
<< "[ ] C time = " << ref_time
/ 1000
293 << " ms, SIMD time = " << tst_time
/ 1000 << " ms\n";
295 EXPECT_GT(ref_time
, tst_time
)
296 << "Error: AV1HighbdSelfguidedFilterTest.SpeedTest, SIMD slower than "
298 << "C time: " << ref_time
<< " us\n"
299 << "SIMD time: " << tst_time
<< " us\n";
306 void RunCorrectnessTest() {
307 tst_fun_
= GET_PARAM(0);
308 const int pu_width
= RESTORATION_PROC_UNIT_SIZE
;
309 const int pu_height
= RESTORATION_PROC_UNIT_SIZE
;
310 // Set the maximum width/height to test here. We actually test a small
311 // range of sizes *up to* this size, so that we can check, eg.,
312 // the behaviour on tiles which are not a multiple of 4 wide.
313 const int max_w
= 260, max_h
= 260, stride
= 672, out_stride
= 672;
314 const int NUM_ITERS
= 81;
316 int bit_depth
= GET_PARAM(1);
317 int mask
= (1 << bit_depth
) - 1;
320 (uint16_t *)aom_memalign(32, stride
* (max_h
+ 32) * sizeof(uint16_t));
321 uint16_t *output_
= (uint16_t *)aom_memalign(
322 32, out_stride
* (max_h
+ 32) * sizeof(uint16_t));
323 uint16_t *output2_
= (uint16_t *)aom_memalign(
324 32, out_stride
* (max_h
+ 32) * sizeof(uint16_t));
325 int32_t *tmpbuf
= (int32_t *)aom_memalign(32, RESTORATION_TMPBUF_SIZE
);
327 uint16_t *input
= input_
+ stride
* 16 + 16;
328 uint16_t *output
= output_
+ out_stride
* 16 + 16;
329 uint16_t *output2
= output2_
+ out_stride
* 16 + 16;
331 ACMRandom
rnd(ACMRandom::DeterministicSeed());
333 av1_loop_restoration_precal();
335 for (i
= 0; i
< NUM_ITERS
; ++i
) {
336 for (j
= -16; j
< max_h
+ 16; ++j
)
337 for (k
= -16; k
< max_w
+ 16; ++k
)
338 input
[j
* stride
+ k
] = rnd
.Rand16() & mask
;
340 int xqd
[2] = { SGRPROJ_PRJ_MIN0
+ rnd
.PseudoUniform(SGRPROJ_PRJ_MAX0
+ 1 -
342 SGRPROJ_PRJ_MIN1
+ rnd
.PseudoUniform(SGRPROJ_PRJ_MAX1
+ 1 -
344 int eps
= rnd
.PseudoUniform(1 << SGRPROJ_PARAMS_BITS
);
346 // Test various tile sizes around 256x256
347 int test_w
= max_w
- (i
/ 9);
348 int test_h
= max_h
- (i
% 9);
350 for (k
= 0; k
< test_h
; k
+= pu_height
)
351 for (j
= 0; j
< test_w
; j
+= pu_width
) {
352 int w
= AOMMIN(pu_width
, test_w
- j
);
353 int h
= AOMMIN(pu_height
, test_h
- k
);
354 uint16_t *input_p
= input
+ k
* stride
+ j
;
355 uint16_t *output_p
= output
+ k
* out_stride
+ j
;
356 uint16_t *output2_p
= output2
+ k
* out_stride
+ j
;
357 tst_fun_(CONVERT_TO_BYTEPTR(input_p
), w
, h
, stride
, eps
, xqd
,
358 CONVERT_TO_BYTEPTR(output_p
), out_stride
, tmpbuf
, bit_depth
,
360 apply_selfguided_restoration_c(
361 CONVERT_TO_BYTEPTR(input_p
), w
, h
, stride
, eps
, xqd
,
362 CONVERT_TO_BYTEPTR(output2_p
), out_stride
, tmpbuf
, bit_depth
, 1);
365 for (j
= 0; j
< test_h
; ++j
)
366 for (k
= 0; k
< test_w
; ++k
)
367 ASSERT_EQ(output
[j
* out_stride
+ k
], output2
[j
* out_stride
+ k
]);
380 TEST_P(AV1HighbdSelfguidedFilterTest
, DISABLED_SpeedTest
) { RunSpeedTest(); }
381 TEST_P(AV1HighbdSelfguidedFilterTest
, CorrectnessTest
) { RunCorrectnessTest(); }
384 const int highbd_params_sse4_1
[] = { 8, 10, 12 };
385 INSTANTIATE_TEST_CASE_P(
386 SSE4_1
, AV1HighbdSelfguidedFilterTest
,
387 ::testing::Combine(::testing::Values(apply_selfguided_restoration_sse4_1
),
388 ::testing::ValuesIn(highbd_params_sse4_1
)));
392 const int highbd_params_avx2
[] = { 8, 10, 12 };
393 INSTANTIATE_TEST_CASE_P(
394 AVX2
, AV1HighbdSelfguidedFilterTest
,
395 ::testing::Combine(::testing::Values(apply_selfguided_restoration_avx2
),
396 ::testing::ValuesIn(highbd_params_avx2
)));