cdef: Add cdef_filter_block_highbd().
[aom.git] / test / cdef_test.cc
blobe9616e09b69b55ab1e3e6c219d870560b3e1a586
1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #include <cstdlib>
13 #include <string>
14 #include <tuple>
16 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
18 #include "config/aom_config.h"
19 #include "config/av1_rtcd.h"
21 #include "aom_ports/aom_timer.h"
22 #include "av1/common/cdef_block.h"
23 #include "test/acm_random.h"
24 #include "test/register_state_check.h"
25 #include "test/util.h"
27 using libaom_test::ACMRandom;
29 namespace {
31 typedef std::tuple<cdef_filter_block_func, cdef_filter_block_func, BLOCK_SIZE,
32 int, int>
33 cdef_dir_param_t;
35 class CDEFBlockTest : public ::testing::TestWithParam<cdef_dir_param_t> {
36 public:
37 virtual ~CDEFBlockTest() {}
38 virtual void SetUp() {
39 cdef = GET_PARAM(0);
40 ref_cdef = GET_PARAM(1);
41 bsize = GET_PARAM(2);
42 boundary = GET_PARAM(3);
43 depth = GET_PARAM(4);
46 virtual void TearDown() {}
48 protected:
49 int bsize;
50 int boundary;
51 int depth;
52 cdef_filter_block_func cdef;
53 cdef_filter_block_func ref_cdef;
55 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFBlockTest);
57 typedef CDEFBlockTest CDEFBlockHighbdTest;
58 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFBlockHighbdTest);
60 typedef CDEFBlockTest CDEFSpeedTest;
61 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFSpeedTest);
63 void test_cdef(int bsize, int iterations, cdef_filter_block_func cdef,
64 cdef_filter_block_func ref_cdef, int boundary, int depth) {
65 const int size = 8;
66 const int ysize = size + 2 * CDEF_VBORDER;
67 ACMRandom rnd(ACMRandom::DeterministicSeed());
68 DECLARE_ALIGNED(16, uint16_t, s[ysize * CDEF_BSTRIDE]);
69 DECLARE_ALIGNED(16, static uint16_t, d[size * size]);
70 DECLARE_ALIGNED(16, static uint16_t, ref_d[size * size]);
71 memset(ref_d, 0, sizeof(ref_d));
72 memset(d, 0, sizeof(d));
74 int error = 0, pristrength = 0, secstrength, dir;
75 int pridamping, secdamping, bits, level, count,
76 errdepth = 0, errpristrength = 0, errsecstrength = 0, errboundary = 0,
77 errpridamping = 0, errsecdamping = 0;
78 unsigned int pos = 0;
80 const unsigned int max_pos = size * size >> static_cast<int>(depth == 8);
81 for (pridamping = 3 + depth - 8; pridamping < 7 - 3 * !!boundary + depth - 8;
82 pridamping++) {
83 for (secdamping = 3 + depth - 8;
84 secdamping < 7 - 3 * !!boundary + depth - 8; secdamping++) {
85 for (count = 0; count < iterations; count++) {
86 for (level = 0; level < (1 << depth) && !error;
87 level += (2 + 6 * !!boundary) << (depth - 8)) {
88 for (bits = 1; bits <= depth && !error; bits += 1 + 3 * !!boundary) {
89 for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
90 s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
91 (1 << depth) - 1);
92 if (boundary) {
93 if (boundary & 1) { // Left
94 for (int i = 0; i < ysize; i++)
95 for (int j = 0; j < CDEF_HBORDER; j++)
96 s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
98 if (boundary & 2) { // Right
99 for (int i = 0; i < ysize; i++)
100 for (int j = CDEF_HBORDER + size; j < CDEF_BSTRIDE; j++)
101 s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
103 if (boundary & 4) { // Above
104 for (int i = 0; i < CDEF_VBORDER; i++)
105 for (int j = 0; j < CDEF_BSTRIDE; j++)
106 s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
108 if (boundary & 8) { // Below
109 for (int i = CDEF_VBORDER + size; i < ysize; i++)
110 for (int j = 0; j < CDEF_BSTRIDE; j++)
111 s[i * CDEF_BSTRIDE + j] = CDEF_VERY_LARGE;
114 for (dir = 0; dir < 8; dir++) {
115 for (pristrength = 0; pristrength <= 19 << (depth - 8) && !error;
116 pristrength += (1 + 4 * !!boundary) << (depth - 8)) {
117 if (pristrength == 16) pristrength = 19;
118 for (secstrength = 0; secstrength <= 4 << (depth - 8) && !error;
119 secstrength += 1 << (depth - 8)) {
120 if (secstrength == 3 << (depth - 8)) continue;
121 ref_cdef(ref_d, size,
122 s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
123 pristrength, secstrength, dir, pridamping,
124 secdamping, bsize, depth - 8);
125 // If cdef and ref_cdef are the same, we're just testing
126 // speed
127 if (cdef != ref_cdef)
128 API_REGISTER_STATE_CHECK(cdef(
129 d, size, s + CDEF_HBORDER + CDEF_VBORDER * CDEF_BSTRIDE,
130 pristrength, secstrength, dir, pridamping, secdamping,
131 bsize, depth - 8));
132 if (ref_cdef != cdef) {
133 for (pos = 0; pos < max_pos && !error; pos++) {
134 error = ref_d[pos] != d[pos];
135 errdepth = depth;
136 errpristrength = pristrength;
137 errsecstrength = secstrength;
138 errboundary = boundary;
139 errpridamping = pridamping;
140 errsecdamping = secdamping;
152 pos--;
153 EXPECT_EQ(0, error) << "Error: CDEFBlockTest, SIMD and C mismatch."
154 << std::endl
155 << "First error at " << pos % size << "," << pos / size
156 << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos]
157 << ") " << std::endl
158 << "pristrength: " << errpristrength << std::endl
159 << "pridamping: " << errpridamping << std::endl
160 << "secstrength: " << errsecstrength << std::endl
161 << "secdamping: " << errsecdamping << std::endl
162 << "depth: " << errdepth << std::endl
163 << "size: " << bsize << std::endl
164 << "boundary: " << errboundary << std::endl
165 << std::endl;
168 void test_cdef_speed(int bsize, int iterations, cdef_filter_block_func cdef,
169 cdef_filter_block_func ref_cdef, int boundary, int depth) {
170 aom_usec_timer ref_timer;
171 aom_usec_timer timer;
173 aom_usec_timer_start(&ref_timer);
174 test_cdef(bsize, iterations, ref_cdef, ref_cdef, boundary, depth);
175 aom_usec_timer_mark(&ref_timer);
176 int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
178 aom_usec_timer_start(&timer);
179 test_cdef(bsize, iterations, cdef, cdef, boundary, depth);
180 aom_usec_timer_mark(&timer);
181 int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
183 EXPECT_GT(ref_elapsed_time, elapsed_time)
184 << "Error: CDEFSpeedTest, SIMD slower than C." << std::endl
185 << "C time: " << ref_elapsed_time << " us" << std::endl
186 << "SIMD time: " << elapsed_time << " us" << std::endl;
189 typedef int (*find_dir_t)(const uint16_t *img, int stride, int32_t *var,
190 int coeff_shift);
192 typedef std::tuple<find_dir_t, find_dir_t> find_dir_param_t;
194 class CDEFFindDirTest : public ::testing::TestWithParam<find_dir_param_t> {
195 public:
196 virtual ~CDEFFindDirTest() {}
197 virtual void SetUp() {
198 finddir = GET_PARAM(0);
199 ref_finddir = GET_PARAM(1);
202 virtual void TearDown() {}
204 protected:
205 find_dir_t finddir;
206 find_dir_t ref_finddir;
208 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirTest);
210 typedef CDEFFindDirTest CDEFFindDirSpeedTest;
211 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CDEFFindDirSpeedTest);
213 void test_finddir(int (*finddir)(const uint16_t *img, int stride, int32_t *var,
214 int coeff_shift),
215 int (*ref_finddir)(const uint16_t *img, int stride,
216 int32_t *var, int coeff_shift)) {
217 const int size = 8;
218 ACMRandom rnd(ACMRandom::DeterministicSeed());
219 DECLARE_ALIGNED(16, uint16_t, s[size * size]);
221 int error = 0;
222 int depth, bits, level, count, errdepth = 0;
223 int ref_res = 0, res = 0;
224 int32_t ref_var = 0, var = 0;
226 for (depth = 8; depth <= 12 && !error; depth += 2) {
227 for (count = 0; count < 512 && !error; count++) {
228 for (level = 0; level < (1 << depth) && !error;
229 level += 1 << (depth - 8)) {
230 for (bits = 1; bits <= depth && !error; bits++) {
231 for (unsigned int i = 0; i < sizeof(s) / sizeof(*s); i++)
232 s[i] = clamp((rnd.Rand16() & ((1 << bits) - 1)) + level, 0,
233 (1 << depth) - 1);
234 for (int c = 0; c < 1 + 9 * (finddir == ref_finddir); c++)
235 ref_res = ref_finddir(s, size, &ref_var, depth - 8);
236 if (finddir != ref_finddir)
237 API_REGISTER_STATE_CHECK(res = finddir(s, size, &var, depth - 8));
238 if (ref_finddir != finddir) {
239 if (res != ref_res || var != ref_var) error = 1;
240 errdepth = depth;
247 EXPECT_EQ(0, error) << "Error: CDEFFindDirTest, SIMD and C mismatch."
248 << std::endl
249 << "return: " << res << " : " << ref_res << std::endl
250 << "var: " << var << " : " << ref_var << std::endl
251 << "depth: " << errdepth << std::endl
252 << std::endl;
255 void test_finddir_speed(int (*finddir)(const uint16_t *img, int stride,
256 int32_t *var, int coeff_shift),
257 int (*ref_finddir)(const uint16_t *img, int stride,
258 int32_t *var, int coeff_shift)) {
259 aom_usec_timer ref_timer;
260 aom_usec_timer timer;
262 aom_usec_timer_start(&ref_timer);
263 test_finddir(ref_finddir, ref_finddir);
264 aom_usec_timer_mark(&ref_timer);
265 int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
267 aom_usec_timer_start(&timer);
268 test_finddir(finddir, finddir);
269 aom_usec_timer_mark(&timer);
270 int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
272 EXPECT_GT(ref_elapsed_time, elapsed_time)
273 << "Error: CDEFFindDirSpeedTest, SIMD slower than C." << std::endl
274 << "C time: " << ref_elapsed_time << " us" << std::endl
275 << "SIMD time: " << elapsed_time << " us" << std::endl;
278 TEST_P(CDEFBlockTest, TestSIMDNoMismatch) {
279 test_cdef(bsize, 1, cdef, ref_cdef, boundary, depth);
282 TEST_P(CDEFBlockHighbdTest, TestSIMDHighbdNoMismatch) {
283 test_cdef(bsize, 1, cdef, ref_cdef, boundary, depth);
286 TEST_P(CDEFSpeedTest, DISABLED_TestSpeed) {
287 test_cdef_speed(bsize, 4, cdef, ref_cdef, boundary, depth);
290 TEST_P(CDEFFindDirTest, TestSIMDNoMismatch) {
291 test_finddir(finddir, ref_finddir);
294 TEST_P(CDEFFindDirSpeedTest, DISABLED_TestSpeed) {
295 test_finddir_speed(finddir, ref_finddir);
298 using std::make_tuple;
300 // VS compiling for 32 bit targets does not support vector types in
301 // structs as arguments, which makes the v256 type of the intrinsics
302 // hard to support, so optimizations for this target are disabled.
303 #if defined(_WIN64) || !defined(_MSC_VER) || defined(__clang__)
304 #if HAVE_SSE2
305 INSTANTIATE_TEST_SUITE_P(
306 SSE2, CDEFBlockTest,
307 ::testing::Combine(::testing::Values(&cdef_filter_block_sse2),
308 ::testing::Values(&cdef_filter_block_c),
309 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
310 BLOCK_8X8),
311 ::testing::Range(0, 16), ::testing::Values(8)));
312 #if CONFIG_AV1_HIGHBITDEPTH
313 INSTANTIATE_TEST_SUITE_P(
314 SSE2, CDEFBlockHighbdTest,
315 ::testing::Combine(::testing::Values(&cdef_filter_block_highbd_sse2),
316 ::testing::Values(&cdef_filter_block_highbd_c),
317 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
318 BLOCK_8X8),
319 ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
320 #endif
321 INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirTest,
322 ::testing::Values(make_tuple(&cdef_find_dir_sse2,
323 &cdef_find_dir_c)));
324 #endif
325 #if HAVE_SSSE3
326 INSTANTIATE_TEST_SUITE_P(
327 SSSE3, CDEFBlockTest,
328 ::testing::Combine(::testing::Values(&cdef_filter_block_ssse3),
329 ::testing::Values(&cdef_filter_block_c),
330 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
331 BLOCK_8X8),
332 ::testing::Range(0, 16), ::testing::Values(8)));
333 #if CONFIG_AV1_HIGHBITDEPTH
334 INSTANTIATE_TEST_SUITE_P(
335 SSSE3, CDEFBlockHighbdTest,
336 ::testing::Combine(::testing::Values(&cdef_filter_block_highbd_ssse3),
337 ::testing::Values(&cdef_filter_block_highbd_c),
338 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
339 BLOCK_8X8),
340 ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
341 #endif
342 INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirTest,
343 ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
344 &cdef_find_dir_c)));
345 #endif
347 #if HAVE_SSE4_1
348 INSTANTIATE_TEST_SUITE_P(
349 SSE4_1, CDEFBlockTest,
350 ::testing::Combine(::testing::Values(&cdef_filter_block_sse4_1),
351 ::testing::Values(&cdef_filter_block_c),
352 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
353 BLOCK_8X8),
354 ::testing::Range(0, 16), ::testing::Values(8)));
355 #if CONFIG_AV1_HIGHBITDEPTH
356 INSTANTIATE_TEST_SUITE_P(
357 SSE4_1, CDEFBlockHighbdTest,
358 ::testing::Combine(::testing::Values(&cdef_filter_block_highbd_sse4_1),
359 ::testing::Values(&cdef_filter_block_highbd_c),
360 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
361 BLOCK_8X8),
362 ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
363 #endif
364 INSTANTIATE_TEST_SUITE_P(SSE4_1, CDEFFindDirTest,
365 ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
366 &cdef_find_dir_c)));
367 #endif
369 #if HAVE_AVX2
370 INSTANTIATE_TEST_SUITE_P(
371 AVX2, CDEFBlockTest,
372 ::testing::Combine(::testing::Values(&cdef_filter_block_avx2),
373 ::testing::Values(&cdef_filter_block_c),
374 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
375 BLOCK_8X8),
376 ::testing::Range(0, 16), ::testing::Values(8)));
377 #if CONFIG_AV1_HIGHBITDEPTH
378 INSTANTIATE_TEST_SUITE_P(
379 AVX2, CDEFBlockHighbdTest,
380 ::testing::Combine(::testing::Values(&cdef_filter_block_highbd_avx2),
381 ::testing::Values(&cdef_filter_block_highbd_c),
382 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
383 BLOCK_8X8),
384 ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
385 #endif
386 INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirTest,
387 ::testing::Values(make_tuple(&cdef_find_dir_avx2,
388 &cdef_find_dir_c)));
389 #endif
391 #if HAVE_NEON
392 INSTANTIATE_TEST_SUITE_P(
393 NEON, CDEFBlockTest,
394 ::testing::Combine(::testing::Values(&cdef_filter_block_neon),
395 ::testing::Values(&cdef_filter_block_c),
396 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
397 BLOCK_8X8),
398 ::testing::Range(0, 16), ::testing::Values(8)));
399 #if CONFIG_AV1_HIGHBITDEPTH
400 INSTANTIATE_TEST_SUITE_P(
401 NEON, CDEFBlockHighbdTest,
402 ::testing::Combine(::testing::Values(&cdef_filter_block_highbd_neon),
403 ::testing::Values(&cdef_filter_block_highbd_c),
404 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
405 BLOCK_8X8),
406 ::testing::Range(0, 16), ::testing::Range(10, 13, 2)));
407 #endif
408 INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirTest,
409 ::testing::Values(make_tuple(&cdef_find_dir_neon,
410 &cdef_find_dir_c)));
411 #endif
413 // Test speed for all supported architectures
414 #if HAVE_SSE2
415 INSTANTIATE_TEST_SUITE_P(
416 SSE2, CDEFSpeedTest,
417 ::testing::Combine(::testing::Values(&cdef_filter_block_sse2),
418 ::testing::Values(&cdef_filter_block_c),
419 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
420 BLOCK_8X8),
421 ::testing::Range(0, 16), ::testing::Values(8)));
422 INSTANTIATE_TEST_SUITE_P(SSE2, CDEFFindDirSpeedTest,
423 ::testing::Values(make_tuple(&cdef_find_dir_sse2,
424 &cdef_find_dir_c)));
425 #endif
427 #if HAVE_SSSE3
428 INSTANTIATE_TEST_SUITE_P(
429 SSSE3, CDEFSpeedTest,
430 ::testing::Combine(::testing::Values(&cdef_filter_block_ssse3),
431 ::testing::Values(&cdef_filter_block_c),
432 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
433 BLOCK_8X8),
434 ::testing::Range(0, 16), ::testing::Values(8)));
435 INSTANTIATE_TEST_SUITE_P(SSSE3, CDEFFindDirSpeedTest,
436 ::testing::Values(make_tuple(&cdef_find_dir_ssse3,
437 &cdef_find_dir_c)));
438 #endif
440 #if HAVE_SSE4_1
441 INSTANTIATE_TEST_SUITE_P(
442 SSE4_1, CDEFSpeedTest,
443 ::testing::Combine(::testing::Values(&cdef_filter_block_sse4_1),
444 ::testing::Values(&cdef_filter_block_c),
445 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
446 BLOCK_8X8),
447 ::testing::Range(0, 16), ::testing::Values(8)));
448 INSTANTIATE_TEST_SUITE_P(SSE4_1, CDEFFindDirSpeedTest,
449 ::testing::Values(make_tuple(&cdef_find_dir_sse4_1,
450 &cdef_find_dir_c)));
451 #endif
453 #if HAVE_AVX2
454 INSTANTIATE_TEST_SUITE_P(
455 AVX2, CDEFSpeedTest,
456 ::testing::Combine(::testing::Values(&cdef_filter_block_avx2),
457 ::testing::Values(&cdef_filter_block_c),
458 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
459 BLOCK_8X8),
460 ::testing::Range(0, 16), ::testing::Values(8)));
461 INSTANTIATE_TEST_SUITE_P(AVX2, CDEFFindDirSpeedTest,
462 ::testing::Values(make_tuple(&cdef_find_dir_avx2,
463 &cdef_find_dir_c)));
464 #endif
466 #if HAVE_NEON
467 INSTANTIATE_TEST_SUITE_P(
468 NEON, CDEFSpeedTest,
469 ::testing::Combine(::testing::Values(&cdef_filter_block_neon),
470 ::testing::Values(&cdef_filter_block_c),
471 ::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4,
472 BLOCK_8X8),
473 ::testing::Range(0, 16), ::testing::Values(8)));
474 INSTANTIATE_TEST_SUITE_P(NEON, CDEFFindDirSpeedTest,
475 ::testing::Values(make_tuple(&cdef_find_dir_neon,
476 &cdef_find_dir_c)));
477 #endif
479 #endif // defined(_WIN64) || !defined(_MSC_VER)
480 } // namespace