2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
17 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
19 #include "config/aom_config.h"
20 #include "config/aom_dsp_rtcd.h"
22 #include "aom_ports/mem.h"
23 #include "av1/common/common_data.h"
24 #include "test/acm_random.h"
25 #include "test/register_state_check.h"
26 #include "test/util.h"
27 #include "test/function_equivalence_test.h"
29 using libaom_test::ACMRandom
;
30 using libaom_test::FunctionEquivalenceTest
;
31 using ::testing::Combine
;
32 using ::testing::Range
;
33 using ::testing::Values
;
34 using ::testing::ValuesIn
;
37 const int kNumIterations
= 10000;
39 static const int16_t kInt13Max
= (1 << 12) - 1;
41 typedef uint64_t (*SSI16Func
)(const int16_t *src
, int stride
, int width
,
43 typedef libaom_test::FuncParam
<SSI16Func
> TestFuncs
;
45 class SumSquaresTest
: public ::testing::TestWithParam
<TestFuncs
> {
47 ~SumSquaresTest() override
= default;
48 void SetUp() override
{
49 params_
= this->GetParam();
50 rnd_
.Reset(ACMRandom::DeterministicSeed());
51 src_
= reinterpret_cast<int16_t *>(aom_memalign(16, 256 * 256 * 2));
52 ASSERT_NE(src_
, nullptr);
55 void TearDown() override
{ aom_free(src_
); }
56 void RunTest(bool is_random
);
59 void GenRandomData(int width
, int height
, int stride
) {
60 const int msb
= 11; // Up to 12 bit input
61 const int limit
= 1 << (msb
+ 1);
62 for (int ii
= 0; ii
< height
; ii
++) {
63 for (int jj
= 0; jj
< width
; jj
++) {
64 src_
[ii
* stride
+ jj
] = rnd_(2) ? rnd_(limit
) : -rnd_(limit
);
69 void GenExtremeData(int width
, int height
, int stride
) {
70 const int msb
= 11; // Up to 12 bit input
71 const int limit
= 1 << (msb
+ 1);
72 const int val
= rnd_(2) ? limit
- 1 : -(limit
- 1);
73 for (int ii
= 0; ii
< height
; ii
++) {
74 for (int jj
= 0; jj
< width
; jj
++) {
75 src_
[ii
* stride
+ jj
] = val
;
85 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SumSquaresTest
);
87 void SumSquaresTest::RunTest(bool is_random
) {
89 for (int k
= 0; k
< kNumIterations
; k
++) {
90 const int width
= 4 * (rnd_(31) + 1); // Up to 128x128
91 const int height
= 4 * (rnd_(31) + 1); // Up to 128x128
92 int stride
= 4 << rnd_(7); // Up to 256 stride
93 while (stride
< width
) { // Make sure it's valid
94 stride
= 4 << rnd_(7);
97 GenRandomData(width
, height
, stride
);
99 GenExtremeData(width
, height
, stride
);
101 const uint64_t res_ref
= params_
.ref_func(src_
, stride
, width
, height
);
103 API_REGISTER_STATE_CHECK(res_tst
=
104 params_
.tst_func(src_
, stride
, width
, height
));
107 failed
= res_ref
!= res_tst
;
108 EXPECT_EQ(res_ref
, res_tst
)
109 << "Error: Sum Squares Test [" << width
<< "x" << height
110 << "] C output does not match optimized output.";
115 void SumSquaresTest::RunSpeedTest() {
116 for (int block
= BLOCK_4X4
; block
< BLOCK_SIZES_ALL
; block
++) {
117 const int width
= block_size_wide
[block
]; // Up to 128x128
118 const int height
= block_size_high
[block
]; // Up to 128x128
119 int stride
= 4 << rnd_(7); // Up to 256 stride
120 while (stride
< width
) { // Make sure it's valid
121 stride
= 4 << rnd_(7);
123 GenExtremeData(width
, height
, stride
);
124 const int num_loops
= 1000000000 / (width
+ height
);
125 aom_usec_timer timer
;
126 aom_usec_timer_start(&timer
);
128 for (int i
= 0; i
< num_loops
; ++i
)
129 params_
.ref_func(src_
, stride
, width
, height
);
131 aom_usec_timer_mark(&timer
);
132 const int elapsed_time
= static_cast<int>(aom_usec_timer_elapsed(&timer
));
133 printf("SumSquaresTest C %3dx%-3d: %7.2f ns\n", width
, height
,
134 1000.0 * elapsed_time
/ num_loops
);
136 aom_usec_timer timer1
;
137 aom_usec_timer_start(&timer1
);
138 for (int i
= 0; i
< num_loops
; ++i
)
139 params_
.tst_func(src_
, stride
, width
, height
);
140 aom_usec_timer_mark(&timer1
);
141 const int elapsed_time1
= static_cast<int>(aom_usec_timer_elapsed(&timer1
));
142 printf("SumSquaresTest Test %3dx%-3d: %7.2f ns\n", width
, height
,
143 1000.0 * elapsed_time1
/ num_loops
);
147 TEST_P(SumSquaresTest
, OperationCheck
) {
148 RunTest(true); // GenRandomData
151 TEST_P(SumSquaresTest
, ExtremeValues
) {
152 RunTest(false); // GenExtremeData
155 TEST_P(SumSquaresTest
, DISABLED_Speed
) { RunSpeedTest(); }
159 INSTANTIATE_TEST_SUITE_P(
160 SSE2
, SumSquaresTest
,
161 ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c
,
162 &aom_sum_squares_2d_i16_sse2
)));
168 INSTANTIATE_TEST_SUITE_P(
169 NEON
, SumSquaresTest
,
170 ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c
,
171 &aom_sum_squares_2d_i16_neon
)));
176 INSTANTIATE_TEST_SUITE_P(
178 ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c
,
179 &aom_sum_squares_2d_i16_sve
)));
184 INSTANTIATE_TEST_SUITE_P(
185 AVX2
, SumSquaresTest
,
186 ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c
,
187 &aom_sum_squares_2d_i16_avx2
)));
190 //////////////////////////////////////////////////////////////////////////////
192 //////////////////////////////////////////////////////////////////////////////
194 typedef uint64_t (*F1D
)(const int16_t *src
, uint32_t n
);
195 typedef libaom_test::FuncParam
<F1D
> TestFuncs1D
;
197 class SumSquares1DTest
: public FunctionEquivalenceTest
<F1D
> {
199 static const int kIterations
= 1000;
200 static const int kMaxSize
= 256;
202 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SumSquares1DTest
);
204 TEST_P(SumSquares1DTest
, RandomValues
) {
205 DECLARE_ALIGNED(16, int16_t, src
[kMaxSize
* kMaxSize
]);
207 for (int iter
= 0; iter
< kIterations
&& !HasFatalFailure(); ++iter
) {
208 for (int i
= 0; i
< kMaxSize
* kMaxSize
; ++i
)
209 src
[i
] = rng_(kInt13Max
* 2 + 1) - kInt13Max
;
211 // Block size is between 64 and 128 * 128 and is always a multiple of 64.
212 const int n
= (rng_(255) + 1) * 64;
214 const uint64_t ref_res
= params_
.ref_func(src
, n
);
216 API_REGISTER_STATE_CHECK(tst_res
= params_
.tst_func(src
, n
));
218 ASSERT_EQ(ref_res
, tst_res
);
222 TEST_P(SumSquares1DTest
, ExtremeValues
) {
223 DECLARE_ALIGNED(16, int16_t, src
[kMaxSize
* kMaxSize
]);
225 for (int iter
= 0; iter
< kIterations
&& !HasFatalFailure(); ++iter
) {
227 for (int i
= 0; i
< kMaxSize
* kMaxSize
; ++i
) src
[i
] = kInt13Max
;
229 for (int i
= 0; i
< kMaxSize
* kMaxSize
; ++i
) src
[i
] = -kInt13Max
;
232 // Block size is between 64 and 128 * 128 and is always a multiple of 64.
233 const int n
= (rng_(255) + 1) * 64;
235 const uint64_t ref_res
= params_
.ref_func(src
, n
);
237 API_REGISTER_STATE_CHECK(tst_res
= params_
.tst_func(src
, n
));
239 ASSERT_EQ(ref_res
, tst_res
);
244 INSTANTIATE_TEST_SUITE_P(SSE2
, SumSquares1DTest
,
245 ::testing::Values(TestFuncs1D(
246 aom_sum_squares_i16_c
, aom_sum_squares_i16_sse2
)));
251 INSTANTIATE_TEST_SUITE_P(NEON
, SumSquares1DTest
,
252 ::testing::Values(TestFuncs1D(
253 aom_sum_squares_i16_c
, aom_sum_squares_i16_neon
)));
258 INSTANTIATE_TEST_SUITE_P(SVE
, SumSquares1DTest
,
259 ::testing::Values(TestFuncs1D(
260 aom_sum_squares_i16_c
, aom_sum_squares_i16_sve
)));
264 typedef int64_t (*SSEFunc
)(const uint8_t *a
, int a_stride
, const uint8_t *b
,
265 int b_stride
, int width
, int height
);
266 typedef libaom_test::FuncParam
<SSEFunc
> TestSSEFuncs
;
268 typedef std::tuple
<TestSSEFuncs
, int> SSETestParam
;
270 class SSETest
: public ::testing::TestWithParam
<SSETestParam
> {
272 ~SSETest() override
= default;
273 void SetUp() override
{
274 params_
= GET_PARAM(0);
275 width_
= GET_PARAM(1);
277 #if CONFIG_AV1_HIGHBITDEPTH
278 params_
.ref_func
== aom_highbd_sse_c
;
282 rnd_
.Reset(ACMRandom::DeterministicSeed());
283 src_
= reinterpret_cast<uint8_t *>(aom_memalign(32, 256 * 256 * 2));
284 ref_
= reinterpret_cast<uint8_t *>(aom_memalign(32, 256 * 256 * 2));
285 ASSERT_NE(src_
, nullptr);
286 ASSERT_NE(ref_
, nullptr);
289 void TearDown() override
{
293 void RunTest(bool is_random
, int width
, int height
, int run_times
);
295 void GenRandomData(int width
, int height
, int stride
) {
296 uint16_t *src16
= reinterpret_cast<uint16_t *>(src_
);
297 uint16_t *ref16
= reinterpret_cast<uint16_t *>(ref_
);
298 const int msb
= 11; // Up to 12 bit input
299 const int limit
= 1 << (msb
+ 1);
300 for (int ii
= 0; ii
< height
; ii
++) {
301 for (int jj
= 0; jj
< width
; jj
++) {
303 src_
[ii
* stride
+ jj
] = rnd_
.Rand8();
304 ref_
[ii
* stride
+ jj
] = rnd_
.Rand8();
306 src16
[ii
* stride
+ jj
] = rnd_(limit
);
307 ref16
[ii
* stride
+ jj
] = rnd_(limit
);
313 void GenExtremeData(int width
, int height
, int stride
, uint8_t *data
,
315 uint16_t *data16
= reinterpret_cast<uint16_t *>(data
);
316 for (int ii
= 0; ii
< height
; ii
++) {
317 for (int jj
= 0; jj
< width
; jj
++) {
319 data
[ii
* stride
+ jj
] = static_cast<uint8_t>(val
);
321 data16
[ii
* stride
+ jj
] = val
;
330 TestSSEFuncs params_
;
335 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SSETest
);
337 void SSETest::RunTest(bool is_random
, int width
, int height
, int run_times
) {
339 aom_usec_timer ref_timer
, test_timer
;
340 for (int k
= 0; k
< 3; k
++) {
341 int stride
= 4 << rnd_(7); // Up to 256 stride
342 while (stride
< width
) { // Make sure it's valid
343 stride
= 4 << rnd_(7);
346 GenRandomData(width
, height
, stride
);
348 const int msb
= is_hbd_
? 12 : 8; // Up to 12 bit input
349 const int limit
= (1 << msb
) - 1;
351 GenExtremeData(width
, height
, stride
, src_
, 0);
352 GenExtremeData(width
, height
, stride
, ref_
, limit
);
354 GenExtremeData(width
, height
, stride
, src_
, limit
);
355 GenExtremeData(width
, height
, stride
, ref_
, 0);
358 int64_t res_ref
, res_tst
;
362 src
= CONVERT_TO_BYTEPTR(src_
);
363 ref
= CONVERT_TO_BYTEPTR(ref_
);
365 res_ref
= params_
.ref_func(src
, stride
, ref
, stride
, width
, height
);
366 res_tst
= params_
.tst_func(src
, stride
, ref
, stride
, width
, height
);
368 aom_usec_timer_start(&ref_timer
);
369 for (int j
= 0; j
< run_times
; j
++) {
370 params_
.ref_func(src
, stride
, ref
, stride
, width
, height
);
372 aom_usec_timer_mark(&ref_timer
);
373 const int elapsed_time_c
=
374 static_cast<int>(aom_usec_timer_elapsed(&ref_timer
));
376 aom_usec_timer_start(&test_timer
);
377 for (int j
= 0; j
< run_times
; j
++) {
378 params_
.tst_func(src
, stride
, ref
, stride
, width
, height
);
380 aom_usec_timer_mark(&test_timer
);
381 const int elapsed_time_simd
=
382 static_cast<int>(aom_usec_timer_elapsed(&test_timer
));
385 "c_time=%d \t simd_time=%d \t "
387 elapsed_time_c
, elapsed_time_simd
,
388 (elapsed_time_c
/ elapsed_time_simd
));
391 failed
= res_ref
!= res_tst
;
392 EXPECT_EQ(res_ref
, res_tst
)
393 << "Error:" << (is_hbd_
? "hbd " : " ") << k
<< " SSE Test ["
394 << width
<< "x" << height
395 << "] C output does not match optimized output.";
401 TEST_P(SSETest
, OperationCheck
) {
402 for (int height
= 4; height
<= 128; height
+= 4) {
403 RunTest(true, width_
, height
, 1); // GenRandomData
407 TEST_P(SSETest
, ExtremeValues
) {
408 for (int height
= 4; height
<= 128; height
+= 4) {
409 RunTest(false, width_
, height
, 1);
413 TEST_P(SSETest
, DISABLED_Speed
) {
414 for (int height
= 4; height
<= 128; height
+= 4) {
415 RunTest(true, width_
, height
, 100);
420 TestSSEFuncs sse_neon
[] = {
421 TestSSEFuncs(&aom_sse_c
, &aom_sse_neon
),
422 #if CONFIG_AV1_HIGHBITDEPTH
423 TestSSEFuncs(&aom_highbd_sse_c
, &aom_highbd_sse_neon
)
426 INSTANTIATE_TEST_SUITE_P(NEON
, SSETest
,
427 Combine(ValuesIn(sse_neon
), Range(4, 129, 4)));
430 #if HAVE_NEON_DOTPROD
431 TestSSEFuncs sse_neon_dotprod
[] = {
432 TestSSEFuncs(&aom_sse_c
, &aom_sse_neon_dotprod
),
434 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD
, SSETest
,
435 Combine(ValuesIn(sse_neon_dotprod
), Range(4, 129, 4)));
436 #endif // HAVE_NEON_DOTPROD
439 TestSSEFuncs sse_sse4
[] = {
440 TestSSEFuncs(&aom_sse_c
, &aom_sse_sse4_1
),
441 #if CONFIG_AV1_HIGHBITDEPTH
442 TestSSEFuncs(&aom_highbd_sse_c
, &aom_highbd_sse_sse4_1
)
445 INSTANTIATE_TEST_SUITE_P(SSE4_1
, SSETest
,
446 Combine(ValuesIn(sse_sse4
), Range(4, 129, 4)));
447 #endif // HAVE_SSE4_1
451 TestSSEFuncs sse_avx2
[] = {
452 TestSSEFuncs(&aom_sse_c
, &aom_sse_avx2
),
453 #if CONFIG_AV1_HIGHBITDEPTH
454 TestSSEFuncs(&aom_highbd_sse_c
, &aom_highbd_sse_avx2
)
457 INSTANTIATE_TEST_SUITE_P(AVX2
, SSETest
,
458 Combine(ValuesIn(sse_avx2
), Range(4, 129, 4)));
462 #if CONFIG_AV1_HIGHBITDEPTH
463 TestSSEFuncs sse_sve
[] = { TestSSEFuncs(&aom_highbd_sse_c
,
464 &aom_highbd_sse_sve
) };
465 INSTANTIATE_TEST_SUITE_P(SVE
, SSETest
,
466 Combine(ValuesIn(sse_sve
), Range(4, 129, 4)));
470 //////////////////////////////////////////////////////////////////////////////
471 // get_blk sum squares test functions
472 //////////////////////////////////////////////////////////////////////////////
474 typedef void (*sse_sum_func
)(const int16_t *data
, int stride
, int bw
, int bh
,
475 int *x_sum
, int64_t *x2_sum
);
476 typedef libaom_test::FuncParam
<sse_sum_func
> TestSSE_SumFuncs
;
478 typedef std::tuple
<TestSSE_SumFuncs
, TX_SIZE
> SSE_SumTestParam
;
480 class SSE_Sum_Test
: public ::testing::TestWithParam
<SSE_SumTestParam
> {
482 ~SSE_Sum_Test() override
= default;
483 void SetUp() override
{
484 params_
= GET_PARAM(0);
485 rnd_
.Reset(ACMRandom::DeterministicSeed());
486 src_
= reinterpret_cast<int16_t *>(aom_memalign(32, 256 * 256 * 2));
487 ASSERT_NE(src_
, nullptr);
490 void TearDown() override
{ aom_free(src_
); }
491 void RunTest(bool is_random
, int tx_size
, int run_times
);
493 void GenRandomData(int width
, int height
, int stride
) {
494 const int msb
= 11; // Up to 12 bit input
495 const int limit
= 1 << (msb
+ 1);
496 for (int ii
= 0; ii
< height
; ii
++) {
497 for (int jj
= 0; jj
< width
; jj
++) {
498 src_
[ii
* stride
+ jj
] = rnd_(limit
);
503 void GenExtremeData(int width
, int height
, int stride
, int16_t *data
,
505 for (int ii
= 0; ii
< height
; ii
++) {
506 for (int jj
= 0; jj
< width
; jj
++) {
507 data
[ii
* stride
+ jj
] = val
;
513 TestSSE_SumFuncs params_
;
517 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(SSE_Sum_Test
);
519 void SSE_Sum_Test::RunTest(bool is_random
, int tx_size
, int run_times
) {
520 aom_usec_timer ref_timer
, test_timer
;
521 int width
= tx_size_wide
[tx_size
];
522 int height
= tx_size_high
[tx_size
];
523 for (int k
= 0; k
< 3; k
++) {
524 int stride
= 4 << rnd_(7); // Up to 256 stride
525 while (stride
< width
) { // Make sure it's valid
526 stride
= 4 << rnd_(7);
529 GenRandomData(width
, height
, stride
);
531 const int msb
= 12; // Up to 12 bit input
532 const int limit
= (1 << msb
) - 1;
534 GenExtremeData(width
, height
, stride
, src_
, limit
);
536 GenExtremeData(width
, height
, stride
, src_
, -limit
);
540 int64_t sse_intr
= 0;
544 params_
.ref_func(src_
, stride
, width
, height
, &sum_c
, &sse_c
);
545 params_
.tst_func(src_
, stride
, width
, height
, &sum_intr
, &sse_intr
);
548 aom_usec_timer_start(&ref_timer
);
549 for (int j
= 0; j
< run_times
; j
++) {
550 params_
.ref_func(src_
, stride
, width
, height
, &sum_c
, &sse_c
);
552 aom_usec_timer_mark(&ref_timer
);
553 const int elapsed_time_c
=
554 static_cast<int>(aom_usec_timer_elapsed(&ref_timer
));
556 aom_usec_timer_start(&test_timer
);
557 for (int j
= 0; j
< run_times
; j
++) {
558 params_
.tst_func(src_
, stride
, width
, height
, &sum_intr
, &sse_intr
);
560 aom_usec_timer_mark(&test_timer
);
561 const int elapsed_time_simd
=
562 static_cast<int>(aom_usec_timer_elapsed(&test_timer
));
565 "c_time=%d \t simd_time=%d \t "
566 "gain=%f\t width=%d\t height=%d \n",
567 elapsed_time_c
, elapsed_time_simd
,
568 (float)((float)elapsed_time_c
/ (float)elapsed_time_simd
), width
,
572 EXPECT_EQ(sum_c
, sum_intr
)
573 << "Error:" << k
<< " SSE Sum Test [" << width
<< "x" << height
574 << "] C output does not match optimized output.";
575 EXPECT_EQ(sse_c
, sse_intr
)
576 << "Error:" << k
<< " SSE Sum Test [" << width
<< "x" << height
577 << "] C output does not match optimized output.";
582 TEST_P(SSE_Sum_Test
, OperationCheck
) {
583 RunTest(true, GET_PARAM(1), 1); // GenRandomData
586 TEST_P(SSE_Sum_Test
, ExtremeValues
) { RunTest(false, GET_PARAM(1), 1); }
588 TEST_P(SSE_Sum_Test
, DISABLED_Speed
) { RunTest(true, GET_PARAM(1), 10000); }
590 #if HAVE_SSE2 || HAVE_AVX2 || HAVE_NEON
591 const TX_SIZE kValidBlockSize
[] = { TX_4X4
, TX_8X8
, TX_16X16
, TX_32X32
,
592 TX_64X64
, TX_4X8
, TX_8X4
, TX_8X16
,
593 TX_16X8
, TX_16X32
, TX_32X16
, TX_64X32
,
594 TX_32X64
, TX_4X16
, TX_16X4
, TX_8X32
,
595 TX_32X8
, TX_16X64
, TX_64X16
};
599 TestSSE_SumFuncs sse_sum_sse2
[] = { TestSSE_SumFuncs(
600 &aom_get_blk_sse_sum_c
, &aom_get_blk_sse_sum_sse2
) };
601 INSTANTIATE_TEST_SUITE_P(SSE2
, SSE_Sum_Test
,
602 Combine(ValuesIn(sse_sum_sse2
),
603 ValuesIn(kValidBlockSize
)));
607 TestSSE_SumFuncs sse_sum_avx2
[] = { TestSSE_SumFuncs(
608 &aom_get_blk_sse_sum_c
, &aom_get_blk_sse_sum_avx2
) };
609 INSTANTIATE_TEST_SUITE_P(AVX2
, SSE_Sum_Test
,
610 Combine(ValuesIn(sse_sum_avx2
),
611 ValuesIn(kValidBlockSize
)));
615 TestSSE_SumFuncs sse_sum_neon
[] = { TestSSE_SumFuncs(
616 &aom_get_blk_sse_sum_c
, &aom_get_blk_sse_sum_neon
) };
617 INSTANTIATE_TEST_SUITE_P(NEON
, SSE_Sum_Test
,
618 Combine(ValuesIn(sse_sum_neon
),
619 ValuesIn(kValidBlockSize
)));
623 TestSSE_SumFuncs sse_sum_sve
[] = { TestSSE_SumFuncs(&aom_get_blk_sse_sum_c
,
624 &aom_get_blk_sse_sum_sve
) };
625 INSTANTIATE_TEST_SUITE_P(SVE
, SSE_Sum_Test
,
626 Combine(ValuesIn(sse_sum_sve
),
627 ValuesIn(kValidBlockSize
)));
630 //////////////////////////////////////////////////////////////////////////////
631 // 2D Variance test functions
632 //////////////////////////////////////////////////////////////////////////////
634 typedef uint64_t (*Var2DFunc
)(uint8_t *src
, int stride
, int width
, int height
);
635 typedef libaom_test::FuncParam
<Var2DFunc
> TestFuncVar2D
;
637 const uint16_t test_block_size
[2] = { 128, 256 };
639 class Lowbd2dVarTest
: public ::testing::TestWithParam
<TestFuncVar2D
> {
641 ~Lowbd2dVarTest() override
= default;
642 void SetUp() override
{
643 params_
= this->GetParam();
644 rnd_
.Reset(ACMRandom::DeterministicSeed());
645 src_
= reinterpret_cast<uint8_t *>(
646 aom_memalign(16, 512 * 512 * sizeof(uint8_t)));
647 ASSERT_NE(src_
, nullptr);
650 void TearDown() override
{ aom_free(src_
); }
651 void RunTest(bool is_random
);
654 void GenRandomData(int width
, int height
, int stride
) {
655 const int msb
= 7; // Up to 8 bit input
656 const int limit
= 1 << (msb
+ 1);
657 for (int ii
= 0; ii
< height
; ii
++) {
658 for (int jj
= 0; jj
< width
; jj
++) {
659 src_
[ii
* stride
+ jj
] = rnd_(limit
);
664 void GenExtremeData(int width
, int height
, int stride
) {
665 const int msb
= 7; // Up to 8 bit input
666 const int limit
= 1 << (msb
+ 1);
667 const int val
= rnd_(2) ? limit
- 1 : 0;
668 for (int ii
= 0; ii
< height
; ii
++) {
669 for (int jj
= 0; jj
< width
; jj
++) {
670 src_
[ii
* stride
+ jj
] = val
;
676 TestFuncVar2D params_
;
680 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Lowbd2dVarTest
);
682 void Lowbd2dVarTest::RunTest(bool is_random
) {
684 for (int k
= 0; k
< kNumIterations
; k
++) {
685 const int width
= 4 * (rnd_(63) + 1); // Up to 256x256
686 const int height
= 4 * (rnd_(63) + 1); // Up to 256x256
687 int stride
= 4 << rnd_(8); // Up to 512 stride
688 while (stride
< width
) { // Make sure it's valid
689 stride
= 4 << rnd_(8);
692 GenRandomData(width
, height
, stride
);
694 GenExtremeData(width
, height
, stride
);
697 const uint64_t res_ref
= params_
.ref_func(src_
, stride
, width
, height
);
699 API_REGISTER_STATE_CHECK(res_tst
=
700 params_
.tst_func(src_
, stride
, width
, height
));
703 failed
= res_ref
!= res_tst
;
704 EXPECT_EQ(res_ref
, res_tst
)
705 << "Error: Sum Squares Test [" << width
<< "x" << height
706 << "] C output does not match optimized output.";
711 void Lowbd2dVarTest::RunSpeedTest() {
712 for (int block
= 0; block
< 2; block
++) {
713 const int width
= test_block_size
[block
];
714 const int height
= test_block_size
[block
];
715 int stride
= 4 << rnd_(8); // Up to 512 stride
716 while (stride
< width
) { // Make sure it's valid
717 stride
= 4 << rnd_(8);
719 GenExtremeData(width
, height
, stride
);
720 const int num_loops
= 1000000000 / (width
+ height
);
721 aom_usec_timer timer
;
722 aom_usec_timer_start(&timer
);
724 for (int i
= 0; i
< num_loops
; ++i
)
725 params_
.ref_func(src_
, stride
, width
, height
);
727 aom_usec_timer_mark(&timer
);
728 const int elapsed_time
= static_cast<int>(aom_usec_timer_elapsed(&timer
));
730 aom_usec_timer timer1
;
731 aom_usec_timer_start(&timer1
);
732 for (int i
= 0; i
< num_loops
; ++i
)
733 params_
.tst_func(src_
, stride
, width
, height
);
734 aom_usec_timer_mark(&timer1
);
735 const int elapsed_time1
= static_cast<int>(aom_usec_timer_elapsed(&timer1
));
736 printf("%3dx%-3d: Scaling = %.2f\n", width
, height
,
737 (double)elapsed_time
/ elapsed_time1
);
741 TEST_P(Lowbd2dVarTest
, OperationCheck
) {
742 RunTest(true); // GenRandomData
745 TEST_P(Lowbd2dVarTest
, ExtremeValues
) {
746 RunTest(false); // GenExtremeData
749 TEST_P(Lowbd2dVarTest
, DISABLED_Speed
) { RunSpeedTest(); }
753 INSTANTIATE_TEST_SUITE_P(SSE2
, Lowbd2dVarTest
,
754 ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c
,
755 &aom_var_2d_u8_sse2
)));
761 INSTANTIATE_TEST_SUITE_P(AVX2
, Lowbd2dVarTest
,
762 ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c
,
763 &aom_var_2d_u8_avx2
)));
769 INSTANTIATE_TEST_SUITE_P(NEON
, Lowbd2dVarTest
,
770 ::testing::Values(TestFuncVar2D(&aom_var_2d_u8_c
,
771 &aom_var_2d_u8_neon
)));
775 #if HAVE_NEON_DOTPROD
777 INSTANTIATE_TEST_SUITE_P(NEON_DOTPROD
, Lowbd2dVarTest
,
778 ::testing::Values(TestFuncVar2D(
779 &aom_var_2d_u8_c
, &aom_var_2d_u8_neon_dotprod
)));
781 #endif // HAVE_NEON_DOTPROD
783 class Highbd2dVarTest
: public ::testing::TestWithParam
<TestFuncVar2D
> {
785 ~Highbd2dVarTest() override
= default;
786 void SetUp() override
{
787 params_
= this->GetParam();
788 rnd_
.Reset(ACMRandom::DeterministicSeed());
789 src_
= reinterpret_cast<uint16_t *>(
790 aom_memalign(16, 512 * 512 * sizeof(uint16_t)));
791 ASSERT_NE(src_
, nullptr);
794 void TearDown() override
{ aom_free(src_
); }
795 void RunTest(bool is_random
);
798 void GenRandomData(int width
, int height
, int stride
) {
799 const int msb
= 11; // Up to 12 bit input
800 const int limit
= 1 << (msb
+ 1);
801 for (int ii
= 0; ii
< height
; ii
++) {
802 for (int jj
= 0; jj
< width
; jj
++) {
803 src_
[ii
* stride
+ jj
] = rnd_(limit
);
808 void GenExtremeData(int width
, int height
, int stride
) {
809 const int msb
= 11; // Up to 12 bit input
810 const int limit
= 1 << (msb
+ 1);
811 const int val
= rnd_(2) ? limit
- 1 : 0;
812 for (int ii
= 0; ii
< height
; ii
++) {
813 for (int jj
= 0; jj
< width
; jj
++) {
814 src_
[ii
* stride
+ jj
] = val
;
820 TestFuncVar2D params_
;
824 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(Highbd2dVarTest
);
826 void Highbd2dVarTest::RunTest(bool is_random
) {
828 for (int k
= 0; k
< kNumIterations
; k
++) {
829 const int width
= 4 * (rnd_(63) + 1); // Up to 256x256
830 const int height
= 4 * (rnd_(63) + 1); // Up to 256x256
831 int stride
= 4 << rnd_(8); // Up to 512 stride
832 while (stride
< width
) { // Make sure it's valid
833 stride
= 4 << rnd_(8);
836 GenRandomData(width
, height
, stride
);
838 GenExtremeData(width
, height
, stride
);
841 const uint64_t res_ref
=
842 params_
.ref_func(CONVERT_TO_BYTEPTR(src_
), stride
, width
, height
);
844 API_REGISTER_STATE_CHECK(
846 params_
.tst_func(CONVERT_TO_BYTEPTR(src_
), stride
, width
, height
));
849 failed
= res_ref
!= res_tst
;
850 EXPECT_EQ(res_ref
, res_tst
)
851 << "Error: Sum Squares Test [" << width
<< "x" << height
852 << "] C output does not match optimized output.";
857 void Highbd2dVarTest::RunSpeedTest() {
858 for (int block
= 0; block
< 2; block
++) {
859 const int width
= test_block_size
[block
];
860 const int height
= test_block_size
[block
];
861 int stride
= 4 << rnd_(8); // Up to 512 stride
862 while (stride
< width
) { // Make sure it's valid
863 stride
= 4 << rnd_(8);
865 GenExtremeData(width
, height
, stride
);
866 const int num_loops
= 1000000000 / (width
+ height
);
867 aom_usec_timer timer
;
868 aom_usec_timer_start(&timer
);
870 for (int i
= 0; i
< num_loops
; ++i
)
871 params_
.ref_func(CONVERT_TO_BYTEPTR(src_
), stride
, width
, height
);
873 aom_usec_timer_mark(&timer
);
874 const int elapsed_time
= static_cast<int>(aom_usec_timer_elapsed(&timer
));
876 aom_usec_timer timer1
;
877 aom_usec_timer_start(&timer1
);
878 for (int i
= 0; i
< num_loops
; ++i
)
879 params_
.tst_func(CONVERT_TO_BYTEPTR(src_
), stride
, width
, height
);
880 aom_usec_timer_mark(&timer1
);
881 const int elapsed_time1
= static_cast<int>(aom_usec_timer_elapsed(&timer1
));
882 printf("%3dx%-3d: Scaling = %.2f\n", width
, height
,
883 (double)elapsed_time
/ elapsed_time1
);
887 TEST_P(Highbd2dVarTest
, OperationCheck
) {
888 RunTest(true); // GenRandomData
891 TEST_P(Highbd2dVarTest
, ExtremeValues
) {
892 RunTest(false); // GenExtremeData
895 TEST_P(Highbd2dVarTest
, DISABLED_Speed
) { RunSpeedTest(); }
899 INSTANTIATE_TEST_SUITE_P(
900 SSE2
, Highbd2dVarTest
,
901 ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c
, &aom_var_2d_u16_sse2
)));
907 INSTANTIATE_TEST_SUITE_P(
908 AVX2
, Highbd2dVarTest
,
909 ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c
, &aom_var_2d_u16_avx2
)));
915 INSTANTIATE_TEST_SUITE_P(
916 NEON
, Highbd2dVarTest
,
917 ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c
, &aom_var_2d_u16_neon
)));
923 INSTANTIATE_TEST_SUITE_P(SVE
, Highbd2dVarTest
,
924 ::testing::Values(TestFuncVar2D(&aom_var_2d_u16_c
,
925 &aom_var_2d_u16_sve
)));