2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
16 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
18 #include "config/aom_config.h"
19 #include "config/av1_rtcd.h"
21 #include "aom_ports/aom_timer.h"
22 #include "av1/common/cdef_block.h"
23 #include "test/acm_random.h"
24 #include "test/register_state_check.h"
25 #include "test/util.h"
27 using libaom_test::ACMRandom
;
31 typedef std::tuple
<cdef_filter_block_func
, cdef_filter_block_func
, BLOCK_SIZE
,
35 class CDEFBlockTest
: public ::testing::TestWithParam
<cdef_dir_param_t
> {
37 virtual ~CDEFBlockTest() {}
38 virtual void SetUp() {
40 ref_cdef
= GET_PARAM(1);
42 boundary
= GET_PARAM(3);
46 virtual void TearDown() {}
52 cdef_filter_block_func cdef
;
53 cdef_filter_block_func ref_cdef
;
55 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFBlockTest
);
57 typedef CDEFBlockTest CDEFBlockHighbdTest
;
58 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFBlockHighbdTest
);
60 typedef CDEFBlockTest CDEFSpeedTest
;
61 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFSpeedTest
);
63 void test_cdef(int bsize
, int iterations
, cdef_filter_block_func cdef
,
64 cdef_filter_block_func ref_cdef
, int boundary
, int depth
) {
66 const int ysize
= size
+ 2 * CDEF_VBORDER
;
67 ACMRandom
rnd(ACMRandom::DeterministicSeed());
68 DECLARE_ALIGNED(16, uint16_t, s
[ysize
* CDEF_BSTRIDE
]);
69 DECLARE_ALIGNED(16, static uint16_t, d
[size
* size
]);
70 DECLARE_ALIGNED(16, static uint16_t, ref_d
[size
* size
]);
71 memset(ref_d
, 0, sizeof(ref_d
));
72 memset(d
, 0, sizeof(d
));
74 int error
= 0, pristrength
= 0, secstrength
, dir
;
75 int pridamping
, secdamping
, bits
, level
, count
,
76 errdepth
= 0, errpristrength
= 0, errsecstrength
= 0, errboundary
= 0,
77 errpridamping
= 0, errsecdamping
= 0;
80 const unsigned int max_pos
= size
* size
>> static_cast<int>(depth
== 8);
81 for (pridamping
= 3 + depth
- 8; pridamping
< 7 - 3 * !!boundary
+ depth
- 8;
83 for (secdamping
= 3 + depth
- 8;
84 secdamping
< 7 - 3 * !!boundary
+ depth
- 8; secdamping
++) {
85 for (count
= 0; count
< iterations
; count
++) {
86 for (level
= 0; level
< (1 << depth
) && !error
;
87 level
+= (2 + 6 * !!boundary
) << (depth
- 8)) {
88 for (bits
= 1; bits
<= depth
&& !error
; bits
+= 1 + 3 * !!boundary
) {
89 for (unsigned int i
= 0; i
< sizeof(s
) / sizeof(*s
); i
++)
90 s
[i
] = clamp((rnd
.Rand16() & ((1 << bits
) - 1)) + level
, 0,
93 if (boundary
& 1) { // Left
94 for (int i
= 0; i
< ysize
; i
++)
95 for (int j
= 0; j
< CDEF_HBORDER
; j
++)
96 s
[i
* CDEF_BSTRIDE
+ j
] = CDEF_VERY_LARGE
;
98 if (boundary
& 2) { // Right
99 for (int i
= 0; i
< ysize
; i
++)
100 for (int j
= CDEF_HBORDER
+ size
; j
< CDEF_BSTRIDE
; j
++)
101 s
[i
* CDEF_BSTRIDE
+ j
] = CDEF_VERY_LARGE
;
103 if (boundary
& 4) { // Above
104 for (int i
= 0; i
< CDEF_VBORDER
; i
++)
105 for (int j
= 0; j
< CDEF_BSTRIDE
; j
++)
106 s
[i
* CDEF_BSTRIDE
+ j
] = CDEF_VERY_LARGE
;
108 if (boundary
& 8) { // Below
109 for (int i
= CDEF_VBORDER
+ size
; i
< ysize
; i
++)
110 for (int j
= 0; j
< CDEF_BSTRIDE
; j
++)
111 s
[i
* CDEF_BSTRIDE
+ j
] = CDEF_VERY_LARGE
;
114 for (dir
= 0; dir
< 8; dir
++) {
115 for (pristrength
= 0; pristrength
<= 19 << (depth
- 8) && !error
;
116 pristrength
+= (1 + 4 * !!boundary
) << (depth
- 8)) {
117 if (pristrength
== 16) pristrength
= 19;
118 for (secstrength
= 0; secstrength
<= 4 << (depth
- 8) && !error
;
119 secstrength
+= 1 << (depth
- 8)) {
120 if (secstrength
== 3 << (depth
- 8)) continue;
121 ref_cdef(ref_d
, size
,
122 s
+ CDEF_HBORDER
+ CDEF_VBORDER
* CDEF_BSTRIDE
,
123 pristrength
, secstrength
, dir
, pridamping
,
124 secdamping
, bsize
, depth
- 8);
125 // If cdef and ref_cdef are the same, we're just testing
127 if (cdef
!= ref_cdef
)
128 API_REGISTER_STATE_CHECK(cdef(
129 d
, size
, s
+ CDEF_HBORDER
+ CDEF_VBORDER
* CDEF_BSTRIDE
,
130 pristrength
, secstrength
, dir
, pridamping
, secdamping
,
132 if (ref_cdef
!= cdef
) {
133 for (pos
= 0; pos
< max_pos
&& !error
; pos
++) {
134 error
= ref_d
[pos
] != d
[pos
];
136 errpristrength
= pristrength
;
137 errsecstrength
= secstrength
;
138 errboundary
= boundary
;
139 errpridamping
= pridamping
;
140 errsecdamping
= secdamping
;
153 EXPECT_EQ(0, error
) << "Error: CDEFBlockTest, SIMD and C mismatch."
155 << "First error at " << pos
% size
<< "," << pos
/ size
156 << " (" << (int16_t)ref_d
[pos
] << " : " << (int16_t)d
[pos
]
158 << "pristrength: " << errpristrength
<< std::endl
159 << "pridamping: " << errpridamping
<< std::endl
160 << "secstrength: " << errsecstrength
<< std::endl
161 << "secdamping: " << errsecdamping
<< std::endl
162 << "depth: " << errdepth
<< std::endl
163 << "size: " << bsize
<< std::endl
164 << "boundary: " << errboundary
<< std::endl
168 void test_cdef_speed(int bsize
, int iterations
, cdef_filter_block_func cdef
,
169 cdef_filter_block_func ref_cdef
, int boundary
, int depth
) {
170 aom_usec_timer ref_timer
;
171 aom_usec_timer timer
;
173 aom_usec_timer_start(&ref_timer
);
174 test_cdef(bsize
, iterations
, ref_cdef
, ref_cdef
, boundary
, depth
);
175 aom_usec_timer_mark(&ref_timer
);
176 int ref_elapsed_time
= (int)aom_usec_timer_elapsed(&ref_timer
);
178 aom_usec_timer_start(&timer
);
179 test_cdef(bsize
, iterations
, cdef
, cdef
, boundary
, depth
);
180 aom_usec_timer_mark(&timer
);
181 int elapsed_time
= (int)aom_usec_timer_elapsed(&timer
);
183 EXPECT_GT(ref_elapsed_time
, elapsed_time
)
184 << "Error: CDEFSpeedTest, SIMD slower than C." << std::endl
185 << "C time: " << ref_elapsed_time
<< " us" << std::endl
186 << "SIMD time: " << elapsed_time
<< " us" << std::endl
;
189 typedef int (*find_dir_t
)(const uint16_t *img
, int stride
, int32_t *var
,
192 typedef std::tuple
<find_dir_t
, find_dir_t
> find_dir_param_t
;
194 class CDEFFindDirTest
: public ::testing::TestWithParam
<find_dir_param_t
> {
196 virtual ~CDEFFindDirTest() {}
197 virtual void SetUp() {
198 finddir
= GET_PARAM(0);
199 ref_finddir
= GET_PARAM(1);
202 virtual void TearDown() {}
206 find_dir_t ref_finddir
;
208 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirTest
);
210 typedef CDEFFindDirTest CDEFFindDirSpeedTest
;
211 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirSpeedTest
);
213 void test_finddir(int (*finddir
)(const uint16_t *img
, int stride
, int32_t *var
,
215 int (*ref_finddir
)(const uint16_t *img
, int stride
,
216 int32_t *var
, int coeff_shift
)) {
218 ACMRandom
rnd(ACMRandom::DeterministicSeed());
219 DECLARE_ALIGNED(16, uint16_t, s
[size
* size
]);
222 int depth
, bits
, level
, count
, errdepth
= 0;
223 int ref_res
= 0, res
= 0;
224 int32_t ref_var
= 0, var
= 0;
226 for (depth
= 8; depth
<= 12 && !error
; depth
+= 2) {
227 for (count
= 0; count
< 512 && !error
; count
++) {
228 for (level
= 0; level
< (1 << depth
) && !error
;
229 level
+= 1 << (depth
- 8)) {
230 for (bits
= 1; bits
<= depth
&& !error
; bits
++) {
231 for (unsigned int i
= 0; i
< sizeof(s
) / sizeof(*s
); i
++)
232 s
[i
] = clamp((rnd
.Rand16() & ((1 << bits
) - 1)) + level
, 0,
234 for (int c
= 0; c
< 1 + 9 * (finddir
== ref_finddir
); c
++)
235 ref_res
= ref_finddir(s
, size
, &ref_var
, depth
- 8);
236 if (finddir
!= ref_finddir
)
237 API_REGISTER_STATE_CHECK(res
= finddir(s
, size
, &var
, depth
- 8));
238 if (ref_finddir
!= finddir
) {
239 if (res
!= ref_res
|| var
!= ref_var
) error
= 1;
247 EXPECT_EQ(0, error
) << "Error: CDEFFindDirTest, SIMD and C mismatch."
249 << "return: " << res
<< " : " << ref_res
<< std::endl
250 << "var: " << var
<< " : " << ref_var
<< std::endl
251 << "depth: " << errdepth
<< std::endl
255 void test_finddir_speed(int (*finddir
)(const uint16_t *img
, int stride
,
256 int32_t *var
, int coeff_shift
),
257 int (*ref_finddir
)(const uint16_t *img
, int stride
,
258 int32_t *var
, int coeff_shift
)) {
259 aom_usec_timer ref_timer
;
260 aom_usec_timer timer
;
262 aom_usec_timer_start(&ref_timer
);
263 test_finddir(ref_finddir
, ref_finddir
);
264 aom_usec_timer_mark(&ref_timer
);
265 int ref_elapsed_time
= (int)aom_usec_timer_elapsed(&ref_timer
);
267 aom_usec_timer_start(&timer
);
268 test_finddir(finddir
, finddir
);
269 aom_usec_timer_mark(&timer
);
270 int elapsed_time
= (int)aom_usec_timer_elapsed(&timer
);
272 EXPECT_GT(ref_elapsed_time
, elapsed_time
)
273 << "Error: CDEFFindDirSpeedTest, SIMD slower than C." << std::endl
274 << "C time: " << ref_elapsed_time
<< " us" << std::endl
275 << "SIMD time: " << elapsed_time
<< " us" << std::endl
;
278 TEST_P(CDEFBlockTest
, TestSIMDNoMismatch
) {
279 test_cdef(bsize
, 1, cdef
, ref_cdef
, boundary
, depth
);
282 TEST_P(CDEFBlockHighbdTest
, TestSIMDHighbdNoMismatch
) {
283 test_cdef(bsize
, 1, cdef
, ref_cdef
, boundary
, depth
);
286 TEST_P(CDEFSpeedTest
, DISABLED_TestSpeed
) {
287 test_cdef_speed(bsize
, 4, cdef
, ref_cdef
, boundary
, depth
);
290 TEST_P(CDEFFindDirTest
, TestSIMDNoMismatch
) {
291 test_finddir(finddir
, ref_finddir
);
294 TEST_P(CDEFFindDirSpeedTest
, DISABLED_TestSpeed
) {
295 test_finddir_speed(finddir
, ref_finddir
);
298 using std::make_tuple
;
300 // VS compiling for 32 bit targets does not support vector types in
301 // structs as arguments, which makes the v256 type of the intrinsics
302 // hard to support, so optimizations for this target are disabled.
303 #if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
305 INSTANTIATE_TEST_SUITE_P(
307 ::testing::Combine(::testing::Values(&cdef_filter_block_sse2
),
308 ::testing::Values(&cdef_filter_block_c
),
309 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
311 ::testing::Range(0, 16), ::testing::Values(8)));
312 #if CONFIG_AV1_HIGHBITDEPTH
313 INSTANTIATE_TEST_SUITE_P(
314 SSE2
, CDEFBlockHighbdTest
,
315 ::testing::Combine(::testing::Values(&cdef_filter_block_highbd_sse2
),
316 ::testing::Values(&cdef_filter_block_highbd_c
),
317 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
319 ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
321 INSTANTIATE_TEST_SUITE_P(SSE2
, CDEFFindDirTest
,
322 ::testing::Values(make_tuple(&cdef_find_dir_sse2
,
326 INSTANTIATE_TEST_SUITE_P(
327 SSSE3
, CDEFBlockTest
,
328 ::testing::Combine(::testing::Values(&cdef_filter_block_ssse3
),
329 ::testing::Values(&cdef_filter_block_c
),
330 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
332 ::testing::Range(0, 16), ::testing::Values(8)));
333 #if CONFIG_AV1_HIGHBITDEPTH
334 INSTANTIATE_TEST_SUITE_P(
335 SSSE3
, CDEFBlockHighbdTest
,
336 ::testing::Combine(::testing::Values(&cdef_filter_block_highbd_ssse3
),
337 ::testing::Values(&cdef_filter_block_highbd_c
),
338 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
340 ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
342 INSTANTIATE_TEST_SUITE_P(SSSE3
, CDEFFindDirTest
,
343 ::testing::Values(make_tuple(&cdef_find_dir_ssse3
,
348 INSTANTIATE_TEST_SUITE_P(
349 SSE4_1
, CDEFBlockTest
,
350 ::testing::Combine(::testing::Values(&cdef_filter_block_sse4_1
),
351 ::testing::Values(&cdef_filter_block_c
),
352 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
354 ::testing::Range(0, 16), ::testing::Values(8)));
355 #if CONFIG_AV1_HIGHBITDEPTH
356 INSTANTIATE_TEST_SUITE_P(
357 SSE4_1
, CDEFBlockHighbdTest
,
358 ::testing::Combine(::testing::Values(&cdef_filter_block_highbd_sse4_1
),
359 ::testing::Values(&cdef_filter_block_highbd_c
),
360 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
362 ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
364 INSTANTIATE_TEST_SUITE_P(SSE4_1
, CDEFFindDirTest
,
365 ::testing::Values(make_tuple(&cdef_find_dir_sse4_1
,
370 INSTANTIATE_TEST_SUITE_P(
372 ::testing::Combine(::testing::Values(&cdef_filter_block_avx2
),
373 ::testing::Values(&cdef_filter_block_c
),
374 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
376 ::testing::Range(0, 16), ::testing::Values(8)));
377 #if CONFIG_AV1_HIGHBITDEPTH
378 INSTANTIATE_TEST_SUITE_P(
379 AVX2
, CDEFBlockHighbdTest
,
380 ::testing::Combine(::testing::Values(&cdef_filter_block_highbd_avx2
),
381 ::testing::Values(&cdef_filter_block_highbd_c
),
382 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
384 ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
386 INSTANTIATE_TEST_SUITE_P(AVX2
, CDEFFindDirTest
,
387 ::testing::Values(make_tuple(&cdef_find_dir_avx2
,
392 INSTANTIATE_TEST_SUITE_P(
394 ::testing::Combine(::testing::Values(&cdef_filter_block_neon
),
395 ::testing::Values(&cdef_filter_block_c
),
396 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
398 ::testing::Range(0, 16), ::testing::Values(8)));
399 #if CONFIG_AV1_HIGHBITDEPTH
400 INSTANTIATE_TEST_SUITE_P(
401 NEON
, CDEFBlockHighbdTest
,
402 ::testing::Combine(::testing::Values(&cdef_filter_block_highbd_neon
),
403 ::testing::Values(&cdef_filter_block_highbd_c
),
404 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
406 ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
408 INSTANTIATE_TEST_SUITE_P(NEON
, CDEFFindDirTest
,
409 ::testing::Values(make_tuple(&cdef_find_dir_neon
,
413 // Test speed for all supported architectures
415 INSTANTIATE_TEST_SUITE_P(
417 ::testing::Combine(::testing::Values(&cdef_filter_block_sse2
),
418 ::testing::Values(&cdef_filter_block_c
),
419 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
421 ::testing::Range(0, 16), ::testing::Values(8)));
422 INSTANTIATE_TEST_SUITE_P(SSE2
, CDEFFindDirSpeedTest
,
423 ::testing::Values(make_tuple(&cdef_find_dir_sse2
,
428 INSTANTIATE_TEST_SUITE_P(
429 SSSE3
, CDEFSpeedTest
,
430 ::testing::Combine(::testing::Values(&cdef_filter_block_ssse3
),
431 ::testing::Values(&cdef_filter_block_c
),
432 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
434 ::testing::Range(0, 16), ::testing::Values(8)));
435 INSTANTIATE_TEST_SUITE_P(SSSE3
, CDEFFindDirSpeedTest
,
436 ::testing::Values(make_tuple(&cdef_find_dir_ssse3
,
441 INSTANTIATE_TEST_SUITE_P(
442 SSE4_1
, CDEFSpeedTest
,
443 ::testing::Combine(::testing::Values(&cdef_filter_block_sse4_1
),
444 ::testing::Values(&cdef_filter_block_c
),
445 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
447 ::testing::Range(0, 16), ::testing::Values(8)));
448 INSTANTIATE_TEST_SUITE_P(SSE4_1
, CDEFFindDirSpeedTest
,
449 ::testing::Values(make_tuple(&cdef_find_dir_sse4_1
,
454 INSTANTIATE_TEST_SUITE_P(
456 ::testing::Combine(::testing::Values(&cdef_filter_block_avx2
),
457 ::testing::Values(&cdef_filter_block_c
),
458 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
460 ::testing::Range(0, 16), ::testing::Values(8)));
461 INSTANTIATE_TEST_SUITE_P(AVX2
, CDEFFindDirSpeedTest
,
462 ::testing::Values(make_tuple(&cdef_find_dir_avx2
,
467 INSTANTIATE_TEST_SUITE_P(
469 ::testing::Combine(::testing::Values(&cdef_filter_block_neon
),
470 ::testing::Values(&cdef_filter_block_c
),
471 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
473 ::testing::Range(0, 16), ::testing::Values(8)));
474 INSTANTIATE_TEST_SUITE_P(NEON
, CDEFFindDirSpeedTest
,
475 ::testing::Values(make_tuple(&cdef_find_dir_neon
,
479 #endif // defined(_WIN64) || !defined(_MSC_VER)