2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
18 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
20 #include "config/aom_config.h"
21 #include "config/av1_rtcd.h"
23 #include "aom_ports/aom_timer.h"
24 #include "av1/common/cdef_block.h"
25 #include "test/acm_random.h"
26 #include "test/register_state_check.h"
27 #include "test/util.h"
29 using libaom_test::ACMRandom
;
33 using CdefFilterBlockFunctions
= std::array
<cdef_filter_block_func
, 4>;
35 typedef std::tuple
<CdefFilterBlockFunctions
, CdefFilterBlockFunctions
,
39 class CDEFBlockTest
: public ::testing::TestWithParam
<cdef_dir_param_t
> {
41 ~CDEFBlockTest() override
= default;
42 void SetUp() override
{
44 ref_cdef
= GET_PARAM(1);
46 boundary
= GET_PARAM(3);
54 CdefFilterBlockFunctions cdef
;
55 CdefFilterBlockFunctions ref_cdef
;
57 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFBlockTest
);
59 typedef CDEFBlockTest CDEFBlockHighbdTest
;
60 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFBlockHighbdTest
);
62 typedef CDEFBlockTest CDEFSpeedTest
;
63 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFSpeedTest
);
65 typedef CDEFBlockTest CDEFSpeedHighbdTest
;
66 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFSpeedHighbdTest
);
68 int64_t test_cdef(BLOCK_SIZE bsize
, int iterations
,
69 CdefFilterBlockFunctions cdef
,
70 CdefFilterBlockFunctions ref_cdef
, int boundary
, int depth
) {
71 aom_usec_timer ref_timer
;
72 int64_t ref_elapsed_time
= 0;
74 const int ysize
= size
+ 2 * CDEF_VBORDER
;
75 ACMRandom
rnd(ACMRandom::DeterministicSeed());
76 DECLARE_ALIGNED(16, uint16_t, s
[ysize
* CDEF_BSTRIDE
]);
77 DECLARE_ALIGNED(16, static uint16_t, d
[size
* size
]);
78 DECLARE_ALIGNED(16, static uint16_t, ref_d
[size
* size
]);
79 memset(ref_d
, 0, sizeof(ref_d
));
80 memset(d
, 0, sizeof(d
));
82 int error
= 0, pristrength
= 0, secstrength
, dir
;
83 int pridamping
, secdamping
, bits
, level
, count
,
84 errdepth
= 0, errpristrength
= 0, errsecstrength
= 0, errboundary
= 0,
85 errpridamping
= 0, errsecdamping
= 0;
88 const int block_width
=
89 ((bsize
== BLOCK_8X8
) || (bsize
== BLOCK_8X4
)) ? 8 : 4;
90 const int block_height
=
91 ((bsize
== BLOCK_8X8
) || (bsize
== BLOCK_4X8
)) ? 8 : 4;
92 const unsigned int max_pos
= size
* size
>> static_cast<int>(depth
== 8);
93 for (pridamping
= 3 + depth
- 8; pridamping
< 7 - 3 * !!boundary
+ depth
- 8;
95 for (secdamping
= 3 + depth
- 8;
96 secdamping
< 7 - 3 * !!boundary
+ depth
- 8; secdamping
++) {
97 for (count
= 0; count
< iterations
; count
++) {
98 for (level
= 0; level
< (1 << depth
) && !error
;
99 level
+= (2 + 6 * !!boundary
) << (depth
- 8)) {
100 for (bits
= 1; bits
<= depth
&& !error
; bits
+= 1 + 3 * !!boundary
) {
101 for (unsigned int i
= 0; i
< sizeof(s
) / sizeof(*s
); i
++)
102 s
[i
] = clamp((rnd
.Rand16() & ((1 << bits
) - 1)) + level
, 0,
105 if (boundary
& 1) { // Left
106 for (int i
= 0; i
< ysize
; i
++)
107 for (int j
= 0; j
< CDEF_HBORDER
; j
++)
108 s
[i
* CDEF_BSTRIDE
+ j
] = CDEF_VERY_LARGE
;
110 if (boundary
& 2) { // Right
111 for (int i
= 0; i
< ysize
; i
++)
112 for (int j
= CDEF_HBORDER
+ size
; j
< CDEF_BSTRIDE
; j
++)
113 s
[i
* CDEF_BSTRIDE
+ j
] = CDEF_VERY_LARGE
;
115 if (boundary
& 4) { // Above
116 for (int i
= 0; i
< CDEF_VBORDER
; i
++)
117 for (int j
= 0; j
< CDEF_BSTRIDE
; j
++)
118 s
[i
* CDEF_BSTRIDE
+ j
] = CDEF_VERY_LARGE
;
120 if (boundary
& 8) { // Below
121 for (int i
= CDEF_VBORDER
+ size
; i
< ysize
; i
++)
122 for (int j
= 0; j
< CDEF_BSTRIDE
; j
++)
123 s
[i
* CDEF_BSTRIDE
+ j
] = CDEF_VERY_LARGE
;
126 for (dir
= 0; dir
< 8; dir
++) {
127 for (pristrength
= 0; pristrength
<= 19 << (depth
- 8) && !error
;
128 pristrength
+= (1 + 4 * !!boundary
) << (depth
- 8)) {
129 if (pristrength
== 16) pristrength
= 19;
130 for (secstrength
= 0; secstrength
<= 4 << (depth
- 8) && !error
;
131 secstrength
+= 1 << (depth
- 8)) {
132 if (secstrength
== 3 << (depth
- 8)) continue;
134 const int strength_index
=
135 (secstrength
== 0) | ((pristrength
== 0) << 1);
137 aom_usec_timer_start(&ref_timer
);
138 ref_cdef
[strength_index
](
140 s
+ CDEF_HBORDER
+ CDEF_VBORDER
* CDEF_BSTRIDE
,
141 pristrength
, secstrength
, dir
, pridamping
, secdamping
,
142 depth
- 8, block_width
, block_height
);
143 aom_usec_timer_mark(&ref_timer
);
144 ref_elapsed_time
+= aom_usec_timer_elapsed(&ref_timer
);
145 // If cdef and ref_cdef are the same, we're just testing
147 if (cdef
[0] != ref_cdef
[0])
148 API_REGISTER_STATE_CHECK(cdef
[strength_index
](
149 d
, size
, s
+ CDEF_HBORDER
+ CDEF_VBORDER
* CDEF_BSTRIDE
,
150 pristrength
, secstrength
, dir
, pridamping
, secdamping
,
151 depth
- 8, block_width
, block_height
));
152 if (ref_cdef
[0] != cdef
[0]) {
153 for (pos
= 0; pos
< max_pos
&& !error
; pos
++) {
154 error
= ref_d
[pos
] != d
[pos
];
156 errpristrength
= pristrength
;
157 errsecstrength
= secstrength
;
158 errboundary
= boundary
;
159 errpridamping
= pridamping
;
160 errsecdamping
= secdamping
;
173 EXPECT_EQ(0, error
) << "Error: CDEFBlockTest, SIMD and C mismatch."
175 << "First error at " << pos
% size
<< "," << pos
/ size
176 << " (" << (int16_t)ref_d
[pos
] << " : " << (int16_t)d
[pos
]
178 << "pristrength: " << errpristrength
<< std::endl
179 << "pridamping: " << errpridamping
<< std::endl
180 << "secstrength: " << errsecstrength
<< std::endl
181 << "secdamping: " << errsecdamping
<< std::endl
182 << "depth: " << errdepth
<< std::endl
183 << "size: " << bsize
<< std::endl
184 << "boundary: " << errboundary
<< std::endl
187 return ref_elapsed_time
;
190 void test_cdef_speed(BLOCK_SIZE bsize
, int iterations
,
191 CdefFilterBlockFunctions cdef
,
192 CdefFilterBlockFunctions ref_cdef
, int boundary
,
194 int64_t ref_elapsed_time
=
195 test_cdef(bsize
, iterations
, ref_cdef
, ref_cdef
, boundary
, depth
);
197 int64_t elapsed_time
=
198 test_cdef(bsize
, iterations
, cdef
, cdef
, boundary
, depth
);
200 std::cout
<< "C time: " << ref_elapsed_time
<< " us" << std::endl
201 << "SIMD time: " << elapsed_time
<< " us" << std::endl
;
203 EXPECT_GT(ref_elapsed_time
, elapsed_time
)
204 << "Error: CDEFSpeedTest, SIMD slower than C." << std::endl
205 << "C time: " << ref_elapsed_time
<< " us" << std::endl
206 << "SIMD time: " << elapsed_time
<< " us" << std::endl
;
209 typedef int (*find_dir_t
)(const uint16_t *img
, int stride
, int32_t *var
,
212 typedef std::tuple
<find_dir_t
, find_dir_t
> find_dir_param_t
;
214 class CDEFFindDirTest
: public ::testing::TestWithParam
<find_dir_param_t
> {
216 ~CDEFFindDirTest() override
= default;
217 void SetUp() override
{
218 finddir
= GET_PARAM(0);
219 ref_finddir
= GET_PARAM(1);
224 find_dir_t ref_finddir
;
226 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirTest
);
228 typedef CDEFFindDirTest CDEFFindDirSpeedTest
;
229 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirSpeedTest
);
231 void test_finddir(int (*finddir
)(const uint16_t *img
, int stride
, int32_t *var
,
233 int (*ref_finddir
)(const uint16_t *img
, int stride
,
234 int32_t *var
, int coeff_shift
)) {
236 ACMRandom
rnd(ACMRandom::DeterministicSeed());
237 DECLARE_ALIGNED(16, uint16_t, s
[size
* size
]);
240 int depth
, bits
, level
, count
, errdepth
= 0;
241 int ref_res
= 0, res
= 0;
242 int32_t ref_var
= 0, var
= 0;
244 for (depth
= 8; depth
<= 12 && !error
; depth
+= 2) {
245 for (count
= 0; count
< 512 && !error
; count
++) {
246 for (level
= 0; level
< (1 << depth
) && !error
;
247 level
+= 1 << (depth
- 8)) {
248 for (bits
= 1; bits
<= depth
&& !error
; bits
++) {
249 for (unsigned int i
= 0; i
< sizeof(s
) / sizeof(*s
); i
++)
250 s
[i
] = clamp((rnd
.Rand16() & ((1 << bits
) - 1)) + level
, 0,
252 for (int c
= 0; c
< 1 + 9 * (finddir
== ref_finddir
); c
++)
253 ref_res
= ref_finddir(s
, size
, &ref_var
, depth
- 8);
254 if (finddir
!= ref_finddir
)
255 API_REGISTER_STATE_CHECK(res
= finddir(s
, size
, &var
, depth
- 8));
256 if (ref_finddir
!= finddir
) {
257 if (res
!= ref_res
|| var
!= ref_var
) error
= 1;
265 EXPECT_EQ(0, error
) << "Error: CDEFFindDirTest, SIMD and C mismatch."
267 << "return: " << res
<< " : " << ref_res
<< std::endl
268 << "var: " << var
<< " : " << ref_var
<< std::endl
269 << "depth: " << errdepth
<< std::endl
273 void test_finddir_speed(int (*finddir
)(const uint16_t *img
, int stride
,
274 int32_t *var
, int coeff_shift
),
275 int (*ref_finddir
)(const uint16_t *img
, int stride
,
276 int32_t *var
, int coeff_shift
)) {
277 aom_usec_timer ref_timer
;
278 aom_usec_timer timer
;
280 aom_usec_timer_start(&ref_timer
);
281 test_finddir(ref_finddir
, ref_finddir
);
282 aom_usec_timer_mark(&ref_timer
);
283 int64_t ref_elapsed_time
= aom_usec_timer_elapsed(&ref_timer
);
285 aom_usec_timer_start(&timer
);
286 test_finddir(finddir
, finddir
);
287 aom_usec_timer_mark(&timer
);
288 int64_t elapsed_time
= aom_usec_timer_elapsed(&timer
);
290 EXPECT_GT(ref_elapsed_time
, elapsed_time
)
291 << "Error: CDEFFindDirSpeedTest, SIMD slower than C." << std::endl
292 << "C time: " << ref_elapsed_time
<< " us" << std::endl
293 << "SIMD time: " << elapsed_time
<< " us" << std::endl
;
296 typedef void (*find_dir_dual_t
)(const uint16_t *img1
, const uint16_t *img2
,
297 int stride
, int32_t *var1
, int32_t *var2
,
298 int coeff_shift
, int *out1
, int *out2
);
300 typedef std::tuple
<find_dir_dual_t
, find_dir_dual_t
> find_dir_dual_param_t
;
302 class CDEFFindDirDualTest
303 : public ::testing::TestWithParam
<find_dir_dual_param_t
> {
305 ~CDEFFindDirDualTest() override
= default;
306 void SetUp() override
{
307 finddir
= GET_PARAM(0);
308 ref_finddir
= GET_PARAM(1);
312 find_dir_dual_t finddir
;
313 find_dir_dual_t ref_finddir
;
315 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirDualTest
);
317 typedef CDEFFindDirDualTest CDEFFindDirDualSpeedTest
;
318 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirDualSpeedTest
);
320 void test_finddir_dual(
321 void (*finddir
)(const uint16_t *img1
, const uint16_t *img2
, int stride
,
322 int32_t *var1
, int32_t *var2
, int coeff_shift
, int *out1
,
324 void (*ref_finddir
)(const uint16_t *img1
, const uint16_t *img2
, int stride
,
325 int32_t *var1
, int32_t *var2
, int coeff_shift
,
326 int *out1
, int *out2
)) {
327 const int size_wd
= 16;
328 const int size_ht
= 8;
329 ACMRandom
rnd(ACMRandom::DeterministicSeed());
330 DECLARE_ALIGNED(16, uint16_t, s
[size_ht
* size_wd
]);
332 int error
= 0, errdepth
= 0;
333 int32_t ref_var
[2] = { 0 };
334 int ref_dir
[2] = { 0 };
335 int32_t var
[2] = { 0 };
338 for (int depth
= 8; depth
<= 12 && !error
; depth
+= 2) {
339 for (int count
= 0; count
< 512 && !error
; count
++) {
340 for (int level
= 0; level
< (1 << depth
) && !error
;
341 level
+= 1 << (depth
- 8)) {
342 for (int bits
= 1; bits
<= depth
&& !error
; bits
++) {
343 for (unsigned int i
= 0; i
< sizeof(s
) / sizeof(*s
); i
++)
344 s
[i
] = clamp((rnd
.Rand16() & ((1 << bits
) - 1)) + level
, 0,
346 for (int c
= 0; c
< 1 + 9 * (finddir
== ref_finddir
); c
++)
347 ref_finddir(s
, s
+ 8, size_wd
, &ref_var
[0], &ref_var
[1], depth
- 8,
348 &ref_dir
[0], &ref_dir
[1]);
349 if (finddir
!= ref_finddir
)
350 API_REGISTER_STATE_CHECK(finddir(s
, s
+ 8, size_wd
, &var
[0],
351 &var
[1], depth
- 8, &dir
[0],
353 if (ref_finddir
!= finddir
) {
354 for (int j
= 0; j
< 2; j
++) {
355 if (ref_dir
[j
] != dir
[j
] || ref_var
[j
] != var
[j
]) error
= 1;
364 for (int j
= 0; j
< 2; j
++) {
365 EXPECT_EQ(0, error
) << "Error: CDEFFindDirTest, SIMD and C mismatch."
367 << "direction: " << dir
[j
] << " : " << ref_dir
[j
]
369 << "variance: " << var
[j
] << " : " << ref_var
[j
]
371 << "depth: " << errdepth
<< std::endl
376 void test_finddir_dual_speed(
377 void (*finddir
)(const uint16_t *img1
, const uint16_t *img2
, int stride
,
378 int32_t *var1
, int32_t *var2
, int coeff_shift
, int *out1
,
380 void (*ref_finddir
)(const uint16_t *img1
, const uint16_t *img2
, int stride
,
381 int32_t *var1
, int32_t *var2
, int coeff_shift
,
382 int *out1
, int *out2
)) {
383 aom_usec_timer ref_timer
;
384 aom_usec_timer timer
;
386 aom_usec_timer_start(&ref_timer
);
387 test_finddir_dual(ref_finddir
, ref_finddir
);
388 aom_usec_timer_mark(&ref_timer
);
389 const double ref_elapsed_time
=
390 static_cast<double>(aom_usec_timer_elapsed(&ref_timer
));
392 aom_usec_timer_start(&timer
);
393 test_finddir_dual(finddir
, finddir
);
394 aom_usec_timer_mark(&timer
);
395 const double elapsed_time
=
396 static_cast<double>(aom_usec_timer_elapsed(&timer
));
399 "ref_time=%lf \t simd_time=%lf \t "
401 ref_elapsed_time
, elapsed_time
, ref_elapsed_time
/ elapsed_time
);
404 #define MAX_CDEF_BLOCK 256
406 constexpr int kIterations
= 100;
408 using CDEFCopyRect8To16
= void (*)(uint16_t *dst
, int dstride
,
409 const uint8_t *src
, int sstride
, int width
,
412 using CDEFCopyRect8To16Param
= std::tuple
<CDEFCopyRect8To16
, CDEFCopyRect8To16
>;
414 class CDEFCopyRect8to16Test
415 : public ::testing::TestWithParam
<CDEFCopyRect8To16Param
> {
417 CDEFCopyRect8to16Test()
418 : rnd_(libaom_test::ACMRandom::DeterministicSeed()),
419 test_func_(GET_PARAM(0)), ref_func_(GET_PARAM(1)) {}
420 ~CDEFCopyRect8to16Test() override
= default;
421 void SetUp() override
{
422 src_
= reinterpret_cast<uint8_t *>(
423 aom_memalign(8, sizeof(uint8_t) * MAX_CDEF_BLOCK
* MAX_CDEF_BLOCK
));
424 ASSERT_NE(src_
, nullptr);
425 ref_dst_
= reinterpret_cast<uint16_t *>(
426 aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK
* MAX_CDEF_BLOCK
));
427 ASSERT_NE(ref_dst_
, nullptr);
428 test_dst_
= reinterpret_cast<uint16_t *>(
429 aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK
* MAX_CDEF_BLOCK
));
430 ASSERT_NE(test_dst_
, nullptr);
433 void TearDown() override
{
439 void test_copy_rect_8_to_16(CDEFCopyRect8To16 test_func
,
440 CDEFCopyRect8To16 ref_func
) {
441 constexpr int stride
= MAX_CDEF_BLOCK
;
443 for (int k
= 0; k
< kIterations
&& !error
; k
++) {
444 // This function operates on values of width that are either 4 or a
445 // multiple of 8. For height, generate a random value between 1 and 256,
446 // making sure it is even.
447 const int width
= k
== 0 ? 4 : (rnd_
.Rand8() % 32 + 1) * 8;
448 const int height
= k
== 0 ? 4 : (rnd_
.Rand8() % 128 + 1) * 2;
449 for (int i
= 0; i
< height
; i
++) {
450 for (int j
= 0; j
< width
; j
++) {
451 src_
[i
* stride
+ j
] = rnd_
.Rand8();
455 ref_func(ref_dst_
, stride
, src_
, stride
, width
, height
);
456 test_func(test_dst_
, stride
, src_
, stride
, width
, height
);
459 for (i
= 0; i
< height
; i
++) {
460 for (j
= 0; j
< width
; j
++) {
461 if (test_dst_
[i
* stride
+ j
] != ref_dst_
[i
* stride
+ j
]) {
471 << "Error: CDEFCopyRect8to16Test, SIMD and C mismatch." << std::endl
472 << "First error at " << i
<< "," << j
<< " ("
473 << ref_dst_
[i
* stride
+ j
] << " : " << test_dst_
[i
* stride
+ j
]
475 << "width: " << width
<< std::endl
476 << "height: " << height
<< std::endl
482 libaom_test::ACMRandom rnd_
;
486 CDEFCopyRect8To16 test_func_
;
487 CDEFCopyRect8To16 ref_func_
;
489 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFCopyRect8to16Test
);
491 using CDEFCopyRect16To16
= void (*)(uint16_t *dst
, int dstride
,
492 const uint16_t *src
, int sstride
, int width
,
495 using CDEFCopyRect16To16Param
=
496 std::tuple
<CDEFCopyRect16To16
, CDEFCopyRect16To16
>;
498 class CDEFCopyRect16to16Test
499 : public ::testing::TestWithParam
<CDEFCopyRect16To16Param
> {
501 CDEFCopyRect16to16Test()
502 : rnd_(libaom_test::ACMRandom::DeterministicSeed()),
503 test_func_(GET_PARAM(0)), ref_func_(GET_PARAM(1)) {}
504 ~CDEFCopyRect16to16Test() override
= default;
505 void SetUp() override
{
506 src_
= reinterpret_cast<uint16_t *>(
507 aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK
* MAX_CDEF_BLOCK
));
508 ASSERT_NE(src_
, nullptr);
509 ref_dst_
= reinterpret_cast<uint16_t *>(
510 aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK
* MAX_CDEF_BLOCK
));
511 ASSERT_NE(ref_dst_
, nullptr);
512 test_dst_
= reinterpret_cast<uint16_t *>(
513 aom_memalign(16, sizeof(uint16_t) * MAX_CDEF_BLOCK
* MAX_CDEF_BLOCK
));
514 ASSERT_NE(test_dst_
, nullptr);
517 void TearDown() override
{
523 void test_copy_rect_16_to_16(CDEFCopyRect16To16 test_func
,
524 CDEFCopyRect16To16 ref_func
) {
525 constexpr int stride
= MAX_CDEF_BLOCK
;
527 for (int k
= 0; k
< kIterations
&& !error
; k
++) {
528 // This function operates on values of width that are either 4 or a
529 // multiple of 8. For height, generate a random value between 1 and 256,
530 // making sure it is even.
531 const int width
= k
== 0 ? 4 : (rnd_
.Rand8() % 32 + 1) * 8;
532 const int height
= k
== 0 ? 4 : (rnd_
.Rand8() % 128 + 1) * 2;
533 for (int i
= 0; i
< height
; i
++) {
534 for (int j
= 0; j
< width
; j
++) {
535 src_
[i
* stride
+ j
] = rnd_
.Rand16();
539 ref_func(ref_dst_
, stride
, src_
, stride
, width
, height
);
540 test_func(test_dst_
, stride
, src_
, stride
, width
, height
);
543 for (i
= 0; i
< height
; i
++) {
544 for (j
= 0; j
< width
; j
++) {
545 if (test_dst_
[i
* stride
+ j
] != ref_dst_
[i
* stride
+ j
]) {
555 << "Error: CDEFCopyRect16to16Test, SIMD and C mismatch." << std::endl
556 << "First error at " << i
<< "," << j
<< " ("
557 << ref_dst_
[i
* stride
+ j
] << " : " << test_dst_
[i
* stride
+ j
]
559 << "width: " << width
<< std::endl
560 << "height: " << height
<< std::endl
566 libaom_test::ACMRandom rnd_
;
570 CDEFCopyRect16To16 test_func_
;
571 CDEFCopyRect16To16 ref_func_
;
573 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFCopyRect16to16Test
);
575 TEST_P(CDEFBlockTest
, TestSIMDNoMismatch
) {
576 test_cdef(bsize
, 1, cdef
, ref_cdef
, boundary
, depth
);
579 TEST_P(CDEFBlockHighbdTest
, TestSIMDHighbdNoMismatch
) {
580 test_cdef(bsize
, 1, cdef
, ref_cdef
, boundary
, depth
);
583 TEST_P(CDEFSpeedTest
, DISABLED_TestSpeed
) {
584 test_cdef_speed(bsize
, 4, cdef
, ref_cdef
, boundary
, depth
);
587 TEST_P(CDEFSpeedHighbdTest
, DISABLED_TestSpeed
) {
588 test_cdef_speed(bsize
, 4, cdef
, ref_cdef
, boundary
, depth
);
591 TEST_P(CDEFFindDirTest
, TestSIMDNoMismatch
) {
592 test_finddir(finddir
, ref_finddir
);
595 TEST_P(CDEFFindDirSpeedTest
, DISABLED_TestSpeed
) {
596 test_finddir_speed(finddir
, ref_finddir
);
599 TEST_P(CDEFFindDirDualTest
, TestSIMDNoMismatch
) {
600 test_finddir_dual(finddir
, ref_finddir
);
603 TEST_P(CDEFFindDirDualSpeedTest
, DISABLED_TestSpeed
) {
604 test_finddir_dual_speed(finddir
, ref_finddir
);
607 TEST_P(CDEFCopyRect8to16Test
, TestSIMDNoMismatch
) {
608 test_copy_rect_8_to_16(test_func_
, ref_func_
);
611 TEST_P(CDEFCopyRect16to16Test
, TestSIMDNoMismatch
) {
612 test_copy_rect_16_to_16(test_func_
, ref_func_
);
615 using std::make_tuple
;
617 #if ((AOM_ARCH_X86 && HAVE_SSSE3) || HAVE_SSE4_1 || HAVE_AVX2 || HAVE_NEON)
618 static const CdefFilterBlockFunctions kCdefFilterFuncC
[] = {
619 { &cdef_filter_8_0_c
, &cdef_filter_8_1_c
, &cdef_filter_8_2_c
,
623 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncC
[] = {
624 { &cdef_filter_16_0_c
, &cdef_filter_16_0_c
, &cdef_filter_16_0_c
,
625 &cdef_filter_16_0_c
}
629 #if AOM_ARCH_X86 && HAVE_SSSE3
630 static const CdefFilterBlockFunctions kCdefFilterFuncSsse3
[] = {
631 { &cdef_filter_8_0_ssse3
, &cdef_filter_8_1_ssse3
, &cdef_filter_8_2_ssse3
,
632 &cdef_filter_8_3_ssse3
}
635 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncSsse3
[] = {
636 { &cdef_filter_16_0_ssse3
, &cdef_filter_16_1_ssse3
, &cdef_filter_16_2_ssse3
,
637 &cdef_filter_16_3_ssse3
}
640 INSTANTIATE_TEST_SUITE_P(
641 SSSE3
, CDEFBlockTest
,
642 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSsse3
),
643 ::testing::ValuesIn(kCdefFilterFuncC
),
644 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
646 ::testing::Range(0, 16), ::testing::Values(8)));
647 INSTANTIATE_TEST_SUITE_P(
648 SSSE3
, CDEFBlockHighbdTest
,
649 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSsse3
),
650 ::testing::ValuesIn(kCdefFilterHighbdFuncC
),
651 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
653 ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
654 INSTANTIATE_TEST_SUITE_P(SSSE3
, CDEFFindDirTest
,
655 ::testing::Values(make_tuple(&cdef_find_dir_ssse3
,
657 INSTANTIATE_TEST_SUITE_P(SSSE3
, CDEFFindDirDualTest
,
658 ::testing::Values(make_tuple(&cdef_find_dir_dual_ssse3
,
659 &cdef_find_dir_dual_c
)));
661 INSTANTIATE_TEST_SUITE_P(
662 SSSE3
, CDEFCopyRect8to16Test
,
663 ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c
,
664 &cdef_copy_rect8_8bit_to_16bit_ssse3
)));
666 INSTANTIATE_TEST_SUITE_P(
667 SSSE3
, CDEFCopyRect16to16Test
,
668 ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c
,
669 &cdef_copy_rect8_16bit_to_16bit_ssse3
)));
673 static const CdefFilterBlockFunctions kCdefFilterFuncSse4_1
[] = {
674 { &cdef_filter_8_0_sse4_1
, &cdef_filter_8_1_sse4_1
, &cdef_filter_8_2_sse4_1
,
675 &cdef_filter_8_3_sse4_1
}
678 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncSse4_1
[] = {
679 { &cdef_filter_16_0_sse4_1
, &cdef_filter_16_1_sse4_1
,
680 &cdef_filter_16_2_sse4_1
, &cdef_filter_16_3_sse4_1
}
683 INSTANTIATE_TEST_SUITE_P(
684 SSE4_1
, CDEFBlockTest
,
685 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse4_1
),
686 ::testing::ValuesIn(kCdefFilterFuncC
),
687 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
689 ::testing::Range(0, 16), ::testing::Values(8)));
690 INSTANTIATE_TEST_SUITE_P(
691 SSE4_1
, CDEFBlockHighbdTest
,
692 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse4_1
),
693 ::testing::ValuesIn(kCdefFilterHighbdFuncC
),
694 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
696 ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
697 INSTANTIATE_TEST_SUITE_P(SSE4_1
, CDEFFindDirTest
,
698 ::testing::Values(make_tuple(&cdef_find_dir_sse4_1
,
700 INSTANTIATE_TEST_SUITE_P(
701 SSE4_1
, CDEFFindDirDualTest
,
702 ::testing::Values(make_tuple(&cdef_find_dir_dual_sse4_1
,
703 &cdef_find_dir_dual_c
)));
705 INSTANTIATE_TEST_SUITE_P(
706 SSE4_1
, CDEFCopyRect8to16Test
,
707 ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c
,
708 &cdef_copy_rect8_8bit_to_16bit_sse4_1
)));
710 INSTANTIATE_TEST_SUITE_P(
711 SSE4_1
, CDEFCopyRect16to16Test
,
712 ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c
,
713 &cdef_copy_rect8_16bit_to_16bit_sse4_1
)));
717 static const CdefFilterBlockFunctions kCdefFilterFuncAvx2
[] = {
718 { &cdef_filter_8_0_avx2
, &cdef_filter_8_1_avx2
, &cdef_filter_8_2_avx2
,
719 &cdef_filter_8_3_avx2
}
722 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncAvx2
[] = {
723 { &cdef_filter_16_0_avx2
, &cdef_filter_16_1_avx2
, &cdef_filter_16_2_avx2
,
724 &cdef_filter_16_3_avx2
}
727 INSTANTIATE_TEST_SUITE_P(
729 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncAvx2
),
730 ::testing::ValuesIn(kCdefFilterFuncC
),
731 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
733 ::testing::Range(0, 16), ::testing::Values(8)));
734 INSTANTIATE_TEST_SUITE_P(
735 AVX2
, CDEFBlockHighbdTest
,
736 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncAvx2
),
737 ::testing::ValuesIn(kCdefFilterHighbdFuncC
),
738 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
740 ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
741 INSTANTIATE_TEST_SUITE_P(AVX2
, CDEFFindDirTest
,
742 ::testing::Values(make_tuple(&cdef_find_dir_avx2
,
744 INSTANTIATE_TEST_SUITE_P(AVX2
, CDEFFindDirDualTest
,
745 ::testing::Values(make_tuple(&cdef_find_dir_dual_avx2
,
746 &cdef_find_dir_dual_c
)));
748 INSTANTIATE_TEST_SUITE_P(
749 AVX2
, CDEFCopyRect8to16Test
,
750 ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c
,
751 &cdef_copy_rect8_8bit_to_16bit_avx2
)));
753 INSTANTIATE_TEST_SUITE_P(
754 AVX2
, CDEFCopyRect16to16Test
,
755 ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c
,
756 &cdef_copy_rect8_16bit_to_16bit_avx2
)));
760 static const CdefFilterBlockFunctions kCdefFilterFuncNeon
[] = {
761 { &cdef_filter_8_0_neon
, &cdef_filter_8_1_neon
, &cdef_filter_8_2_neon
,
762 &cdef_filter_8_3_neon
}
765 static const CdefFilterBlockFunctions kCdefFilterHighbdFuncNeon
[] = {
766 { &cdef_filter_16_0_neon
, &cdef_filter_16_1_neon
, &cdef_filter_16_2_neon
,
767 &cdef_filter_16_3_neon
}
770 INSTANTIATE_TEST_SUITE_P(
772 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncNeon
),
773 ::testing::ValuesIn(kCdefFilterFuncC
),
774 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
776 ::testing::Range(0, 16), ::testing::Values(8)));
777 INSTANTIATE_TEST_SUITE_P(
778 NEON
, CDEFBlockHighbdTest
,
779 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncNeon
),
780 ::testing::ValuesIn(kCdefFilterHighbdFuncC
),
781 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
783 ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
784 INSTANTIATE_TEST_SUITE_P(NEON
, CDEFFindDirTest
,
785 ::testing::Values(make_tuple(&cdef_find_dir_neon
,
787 INSTANTIATE_TEST_SUITE_P(NEON
, CDEFFindDirDualTest
,
788 ::testing::Values(make_tuple(&cdef_find_dir_dual_neon
,
789 &cdef_find_dir_dual_c
)));
791 INSTANTIATE_TEST_SUITE_P(
792 NEON
, CDEFCopyRect8to16Test
,
793 ::testing::Values(make_tuple(&cdef_copy_rect8_8bit_to_16bit_c
,
794 &cdef_copy_rect8_8bit_to_16bit_neon
)));
796 INSTANTIATE_TEST_SUITE_P(
797 NEON
, CDEFCopyRect16to16Test
,
798 ::testing::Values(make_tuple(&cdef_copy_rect8_16bit_to_16bit_c
,
799 &cdef_copy_rect8_16bit_to_16bit_neon
)));
802 // Test speed for all supported architectures
803 #if AOM_ARCH_X86 && HAVE_SSSE3
804 INSTANTIATE_TEST_SUITE_P(
805 SSSE3
, CDEFSpeedTest
,
806 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSsse3
),
807 ::testing::ValuesIn(kCdefFilterFuncC
),
808 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
810 ::testing::Range(0, 16), ::testing::Values(8)));
811 INSTANTIATE_TEST_SUITE_P(
812 SSSE3
, CDEFSpeedHighbdTest
,
813 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSsse3
),
814 ::testing::ValuesIn(kCdefFilterHighbdFuncC
),
815 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
817 ::testing::Range(0, 16), ::testing::Values(10)));
818 INSTANTIATE_TEST_SUITE_P(SSSE3
, CDEFFindDirSpeedTest
,
819 ::testing::Values(make_tuple(&cdef_find_dir_ssse3
,
821 INSTANTIATE_TEST_SUITE_P(SSSE3
, CDEFFindDirDualSpeedTest
,
822 ::testing::Values(make_tuple(&cdef_find_dir_dual_ssse3
,
823 &cdef_find_dir_dual_c
)));
827 INSTANTIATE_TEST_SUITE_P(
828 SSE4_1
, CDEFSpeedTest
,
829 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncSse4_1
),
830 ::testing::ValuesIn(kCdefFilterFuncC
),
831 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
833 ::testing::Range(0, 16), ::testing::Values(8)));
834 INSTANTIATE_TEST_SUITE_P(
835 SSE4_1
, CDEFSpeedHighbdTest
,
836 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncSse4_1
),
837 ::testing::ValuesIn(kCdefFilterHighbdFuncC
),
838 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
840 ::testing::Range(0, 16), ::testing::Values(10)));
841 INSTANTIATE_TEST_SUITE_P(SSE4_1
, CDEFFindDirSpeedTest
,
842 ::testing::Values(make_tuple(&cdef_find_dir_sse4_1
,
844 INSTANTIATE_TEST_SUITE_P(
845 SSE4_1
, CDEFFindDirDualSpeedTest
,
846 ::testing::Values(make_tuple(&cdef_find_dir_dual_sse4_1
,
847 &cdef_find_dir_dual_c
)));
851 INSTANTIATE_TEST_SUITE_P(
853 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncAvx2
),
854 ::testing::ValuesIn(kCdefFilterFuncC
),
855 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
857 ::testing::Range(0, 16), ::testing::Values(8)));
858 INSTANTIATE_TEST_SUITE_P(
859 AVX2
, CDEFSpeedHighbdTest
,
860 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncAvx2
),
861 ::testing::ValuesIn(kCdefFilterHighbdFuncC
),
862 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
864 ::testing::Range(0, 16), ::testing::Values(10)));
865 INSTANTIATE_TEST_SUITE_P(AVX2
, CDEFFindDirSpeedTest
,
866 ::testing::Values(make_tuple(&cdef_find_dir_avx2
,
868 INSTANTIATE_TEST_SUITE_P(AVX2
, CDEFFindDirDualSpeedTest
,
869 ::testing::Values(make_tuple(&cdef_find_dir_dual_avx2
,
870 &cdef_find_dir_dual_c
)));
874 INSTANTIATE_TEST_SUITE_P(
876 ::testing::Combine(::testing::ValuesIn(kCdefFilterFuncNeon
),
877 ::testing::ValuesIn(kCdefFilterFuncC
),
878 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
880 ::testing::Range(0, 16), ::testing::Values(8)));
881 INSTANTIATE_TEST_SUITE_P(
882 NEON
, CDEFSpeedHighbdTest
,
883 ::testing::Combine(::testing::ValuesIn(kCdefFilterHighbdFuncNeon
),
884 ::testing::ValuesIn(kCdefFilterHighbdFuncC
),
885 ::testing::Values(BLOCK_4X4
, BLOCK_4X8
, BLOCK_8X4
,
887 ::testing::Range(0, 16), ::testing::Values(10)));
888 INSTANTIATE_TEST_SUITE_P(NEON
, CDEFFindDirSpeedTest
,
889 ::testing::Values(make_tuple(&cdef_find_dir_neon
,
891 INSTANTIATE_TEST_SUITE_P(NEON
, CDEFFindDirDualSpeedTest
,
892 ::testing::Values(make_tuple(&cdef_find_dir_dual_neon
,
893 &cdef_find_dir_dual_c
)));