2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
11 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
13 #include "aom_ports/aom_timer.h"
14 #include "./av1_rtcd.h"
15 #include "test/util.h"
16 #include "test/acm_random.h"
18 using ::testing::make_tuple
;
20 using libaom_test::ACMRandom
;
22 #define NUM_ITERATIONS (100)
23 #define NUM_ITERATIONS_SPEED (INT16_MAX)
25 #define ALL_CFL_TX_SIZES(function) \
26 make_tuple(TX_4X4, &function), make_tuple(TX_4X8, &function), \
27 make_tuple(TX_4X16, &function), make_tuple(TX_8X4, &function), \
28 make_tuple(TX_8X8, &function), make_tuple(TX_8X16, &function), \
29 make_tuple(TX_8X32, &function), make_tuple(TX_16X4, &function), \
30 make_tuple(TX_16X8, &function), make_tuple(TX_16X16, &function), \
31 make_tuple(TX_16X32, &function), make_tuple(TX_32X8, &function), \
32 make_tuple(TX_32X16, &function), make_tuple(TX_32X32, &function)
35 typedef cfl_subsample_lbd_fn (*get_subsample_fn
)(TX_SIZE tx_size
);
37 typedef cfl_predict_lbd_fn (*get_predict_fn
)(TX_SIZE tx_size
);
39 typedef cfl_predict_hbd_fn (*get_predict_fn_hbd
)(TX_SIZE tx_size
);
41 typedef cfl_subtract_average_fn (*sub_avg_fn
)(TX_SIZE tx_size
);
43 typedef ::testing::tuple
<TX_SIZE
, get_subsample_fn
> subsample_param
;
45 typedef ::testing::tuple
<TX_SIZE
, get_predict_fn
> predict_param
;
47 typedef ::testing::tuple
<TX_SIZE
, get_predict_fn_hbd
> predict_param_hbd
;
49 typedef ::testing::tuple
<TX_SIZE
, sub_avg_fn
> sub_avg_param
;
52 static void assert_eq(const A
*a
, const A
*b
, int width
, int height
) {
53 for (int j
= 0; j
< height
; j
++) {
54 for (int i
= 0; i
< width
; i
++) {
55 ASSERT_EQ(a
[j
* CFL_BUF_LINE
+ i
], b
[j
* CFL_BUF_LINE
+ i
]);
60 static void assertFaster(int ref_elapsed_time
, int elapsed_time
) {
61 EXPECT_GT(ref_elapsed_time
, elapsed_time
)
62 << "Error: CFLSubtractSpeedTest, SIMD slower than C." << std::endl
63 << "C time: " << ref_elapsed_time
<< " us" << std::endl
64 << "SIMD time: " << elapsed_time
<< " us" << std::endl
;
67 static void printSpeed(int ref_elapsed_time
, int elapsed_time
, int width
,
69 std::cout
.precision(2);
70 std::cout
<< "[ ] " << width
<< "x" << height
71 << ": C time = " << ref_elapsed_time
72 << " us, SIMD time = " << elapsed_time
<< " us"
73 << " (~" << ref_elapsed_time
/ (double)elapsed_time
<< "x) "
79 : public ::testing::TestWithParam
< ::testing::tuple
<TX_SIZE
, F
> > {
82 virtual void SetUp() {
83 tx_size
= ::testing::get
<0>(this->GetParam());
84 width
= tx_size_wide
[tx_size
];
85 height
= tx_size_high
[tx_size
];
86 fun_under_test
= ::testing::get
<1>(this->GetParam());
87 rnd(ACMRandom::DeterministicSeed());
98 template <typename F
, typename I
>
99 class CFLTestWithData
: public CFLTest
<F
> {
101 virtual ~CFLTestWithData() {}
104 I data
[CFL_BUF_SQUARE
];
105 I data_ref
[CFL_BUF_SQUARE
];
107 void init(I (ACMRandom::*random
)()) {
108 for (int j
= 0; j
< this->height
; j
++) {
109 for (int i
= 0; i
< this->width
; i
++) {
110 const I d
= (this->rnd
.*random
)();
111 data
[j
* CFL_BUF_LINE
+ i
] = d
;
112 data_ref
[j
* CFL_BUF_LINE
+ i
] = d
;
118 template <typename F
, typename I
>
119 class CFLTestWithAlignedData
: public CFLTest
<F
> {
121 virtual ~CFLTestWithAlignedData() {}
122 virtual void SetUp() {
125 reinterpret_cast<I
*>(aom_memalign(32, sizeof(I
) * CFL_BUF_SQUARE
));
127 reinterpret_cast<I
*>(aom_memalign(32, sizeof(I
) * CFL_BUF_SQUARE
));
128 sub_luma_pels_ref
= reinterpret_cast<int16_t *>(
129 aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE
));
130 sub_luma_pels
= reinterpret_cast<int16_t *>(
131 aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE
));
132 memset(chroma_pels_ref
, 0, sizeof(I
) * CFL_BUF_SQUARE
);
133 memset(chroma_pels
, 0, sizeof(I
) * CFL_BUF_SQUARE
);
134 memset(sub_luma_pels_ref
, 0, sizeof(int16_t) * CFL_BUF_SQUARE
);
135 memset(sub_luma_pels
, 0, sizeof(int16_t) * CFL_BUF_SQUARE
);
138 virtual void TearDown() {
139 aom_free(chroma_pels_ref
);
140 aom_free(sub_luma_pels_ref
);
141 aom_free(chroma_pels
);
142 aom_free(sub_luma_pels
);
148 int16_t *sub_luma_pels_ref
;
149 int16_t *sub_luma_pels
;
153 alpha_q3
= this->rnd(33) - 16;
154 dc
= this->rnd(1 << bd
);
155 for (int j
= 0; j
< this->height
; j
++) {
156 for (int i
= 0; i
< this->width
; i
++) {
157 chroma_pels
[j
* CFL_BUF_LINE
+ i
] = dc
;
158 chroma_pels_ref
[j
* CFL_BUF_LINE
+ i
] = dc
;
159 sub_luma_pels_ref
[j
* CFL_BUF_LINE
+ i
] =
160 sub_luma_pels
[j
* CFL_BUF_LINE
+ i
] = this->rnd
.Rand15Signed();
166 class CFLSubAvgTest
: public CFLTestWithData
<sub_avg_fn
, int16_t> {
168 virtual ~CFLSubAvgTest() {}
171 class CFLSubsampleTest
: public CFLTestWithData
<get_subsample_fn
, uint8_t> {
173 virtual ~CFLSubsampleTest() {}
176 class CFLPredictTest
: public CFLTestWithAlignedData
<get_predict_fn
, uint8_t> {
178 virtual ~CFLPredictTest() {}
181 class CFLPredictHBDTest
182 : public CFLTestWithAlignedData
<get_predict_fn_hbd
, uint16_t> {
184 virtual ~CFLPredictHBDTest() {}
187 TEST_P(CFLSubAvgTest
, SubAvgTest
) {
188 const cfl_subtract_average_fn ref_sub
= get_subtract_average_fn_c(tx_size
);
189 const cfl_subtract_average_fn sub
= fun_under_test(tx_size
);
190 for (int it
= 0; it
< NUM_ITERATIONS
; it
++) {
191 init(&ACMRandom::Rand15Signed
);
194 assert_eq
<int16_t>(data
, data_ref
, width
, height
);
198 TEST_P(CFLSubAvgTest
, DISABLED_SubAvgSpeedTest
) {
199 const cfl_subtract_average_fn ref_sub
= get_subtract_average_fn_c(tx_size
);
200 const cfl_subtract_average_fn sub
= fun_under_test(tx_size
);
202 aom_usec_timer ref_timer
;
203 aom_usec_timer timer
;
205 init(&ACMRandom::Rand15Signed
);
206 aom_usec_timer_start(&ref_timer
);
207 for (int k
= 0; k
< NUM_ITERATIONS_SPEED
; k
++) {
210 aom_usec_timer_mark(&ref_timer
);
211 int ref_elapsed_time
= (int)aom_usec_timer_elapsed(&ref_timer
);
213 aom_usec_timer_start(&timer
);
214 for (int k
= 0; k
< NUM_ITERATIONS_SPEED
; k
++) {
217 aom_usec_timer_mark(&timer
);
218 int elapsed_time
= (int)aom_usec_timer_elapsed(&timer
);
220 printSpeed(ref_elapsed_time
, elapsed_time
, width
, height
);
221 assertFaster(ref_elapsed_time
, elapsed_time
);
224 TEST_P(CFLSubsampleTest
, SubsampleTest
) {
225 int16_t sub_luma_pels
[CFL_BUF_SQUARE
];
226 int16_t sub_luma_pels_ref
[CFL_BUF_SQUARE
];
227 const int sub_width
= width
>> 1;
228 const int sub_height
= height
>> 1;
230 for (int it
= 0; it
< NUM_ITERATIONS
; it
++) {
231 init(&ACMRandom::Rand8
);
232 fun_under_test(tx_size
)(data
, CFL_BUF_LINE
, sub_luma_pels
);
233 cfl_get_luma_subsampling_420_lbd_c(tx_size
)(data_ref
, CFL_BUF_LINE
,
235 assert_eq
<int16_t>(sub_luma_pels
, sub_luma_pels_ref
, sub_width
, sub_height
);
239 TEST_P(CFLSubsampleTest
, DISABLED_SubsampleSpeedTest
) {
240 int16_t sub_luma_pels
[CFL_BUF_SQUARE
];
241 int16_t sub_luma_pels_ref
[CFL_BUF_SQUARE
];
242 cfl_subsample_lbd_fn subsample
= fun_under_test(tx_size
);
243 cfl_subsample_lbd_fn subsample_ref
=
244 cfl_get_luma_subsampling_420_lbd_c(tx_size
);
245 aom_usec_timer ref_timer
;
246 aom_usec_timer timer
;
248 init(&ACMRandom::Rand8
);
249 aom_usec_timer_start(&ref_timer
);
250 for (int k
= 0; k
< NUM_ITERATIONS_SPEED
; k
++) {
251 subsample_ref(data_ref
, CFL_BUF_LINE
, sub_luma_pels
);
253 aom_usec_timer_mark(&ref_timer
);
254 int ref_elapsed_time
= (int)aom_usec_timer_elapsed(&ref_timer
);
256 aom_usec_timer_start(&timer
);
257 for (int k
= 0; k
< NUM_ITERATIONS_SPEED
; k
++) {
258 subsample(data
, CFL_BUF_LINE
, sub_luma_pels_ref
);
260 aom_usec_timer_mark(&timer
);
261 int elapsed_time
= (int)aom_usec_timer_elapsed(&timer
);
263 printSpeed(ref_elapsed_time
, elapsed_time
, width
, height
);
264 assertFaster(ref_elapsed_time
, elapsed_time
);
267 TEST_P(CFLPredictTest
, PredictTest
) {
268 for (int it
= 0; it
< NUM_ITERATIONS
; it
++) {
270 fun_under_test(tx_size
)(sub_luma_pels
, chroma_pels
, CFL_BUF_LINE
, alpha_q3
);
271 get_predict_lbd_fn_c(tx_size
)(sub_luma_pels_ref
, chroma_pels_ref
,
272 CFL_BUF_LINE
, alpha_q3
);
274 assert_eq
<uint8_t>(chroma_pels
, chroma_pels_ref
, width
, height
);
278 TEST_P(CFLPredictTest
, DISABLED_PredictSpeedTest
) {
279 aom_usec_timer ref_timer
;
280 aom_usec_timer timer
;
283 cfl_predict_lbd_fn predict_impl
= get_predict_lbd_fn_c(tx_size
);
284 aom_usec_timer_start(&ref_timer
);
286 for (int k
= 0; k
< NUM_ITERATIONS_SPEED
; k
++) {
287 predict_impl(sub_luma_pels_ref
, chroma_pels_ref
, CFL_BUF_LINE
, alpha_q3
);
289 aom_usec_timer_mark(&ref_timer
);
290 int ref_elapsed_time
= (int)aom_usec_timer_elapsed(&ref_timer
);
292 predict_impl
= fun_under_test(tx_size
);
293 aom_usec_timer_start(&timer
);
294 for (int k
= 0; k
< NUM_ITERATIONS_SPEED
; k
++) {
295 predict_impl(sub_luma_pels
, chroma_pels
, CFL_BUF_LINE
, alpha_q3
);
297 aom_usec_timer_mark(&timer
);
298 int elapsed_time
= (int)aom_usec_timer_elapsed(&timer
);
300 printSpeed(ref_elapsed_time
, elapsed_time
, width
, height
);
301 assertFaster(ref_elapsed_time
, elapsed_time
);
304 TEST_P(CFLPredictHBDTest
, PredictHBDTest
) {
306 for (int it
= 0; it
< NUM_ITERATIONS
; it
++) {
308 fun_under_test(tx_size
)(sub_luma_pels
, chroma_pels
, CFL_BUF_LINE
, alpha_q3
,
310 get_predict_hbd_fn_c(tx_size
)(sub_luma_pels_ref
, chroma_pels_ref
,
311 CFL_BUF_LINE
, alpha_q3
, bd
);
313 assert_eq
<uint16_t>(chroma_pels
, chroma_pels_ref
, width
, height
);
317 TEST_P(CFLPredictHBDTest
, DISABLED_PredictHBDSpeedTest
) {
318 aom_usec_timer ref_timer
;
319 aom_usec_timer timer
;
322 cfl_predict_hbd_fn predict_impl
= get_predict_hbd_fn_c(tx_size
);
323 aom_usec_timer_start(&ref_timer
);
325 for (int k
= 0; k
< NUM_ITERATIONS_SPEED
; k
++) {
326 predict_impl(sub_luma_pels_ref
, chroma_pels_ref
, CFL_BUF_LINE
, alpha_q3
,
329 aom_usec_timer_mark(&ref_timer
);
330 int ref_elapsed_time
= (int)aom_usec_timer_elapsed(&ref_timer
);
332 predict_impl
= fun_under_test(tx_size
);
333 aom_usec_timer_start(&timer
);
334 for (int k
= 0; k
< NUM_ITERATIONS_SPEED
; k
++) {
335 predict_impl(sub_luma_pels
, chroma_pels
, CFL_BUF_LINE
, alpha_q3
, bd
);
337 aom_usec_timer_mark(&timer
);
338 int elapsed_time
= (int)aom_usec_timer_elapsed(&timer
);
340 printSpeed(ref_elapsed_time
, elapsed_time
, width
, height
);
341 assertFaster(ref_elapsed_time
, elapsed_time
);
345 const sub_avg_param sub_avg_sizes_sse2
[] = { ALL_CFL_TX_SIZES(
346 get_subtract_average_fn_sse2
) };
348 INSTANTIATE_TEST_CASE_P(SSE2
, CFLSubAvgTest
,
349 ::testing::ValuesIn(sub_avg_sizes_sse2
));
355 const subsample_param subsample_sizes_ssse3
[] = { ALL_CFL_TX_SIZES(
356 cfl_get_luma_subsampling_420_lbd_ssse3
) };
358 const predict_param predict_sizes_ssse3
[] = { ALL_CFL_TX_SIZES(
359 get_predict_lbd_fn_ssse3
) };
361 const predict_param_hbd predict_sizes_hbd_ssse3
[] = { ALL_CFL_TX_SIZES(
362 get_predict_hbd_fn_ssse3
) };
364 INSTANTIATE_TEST_CASE_P(SSSE3
, CFLSubsampleTest
,
365 ::testing::ValuesIn(subsample_sizes_ssse3
));
367 INSTANTIATE_TEST_CASE_P(SSSE3
, CFLPredictTest
,
368 ::testing::ValuesIn(predict_sizes_ssse3
));
370 INSTANTIATE_TEST_CASE_P(SSSE3
, CFLPredictHBDTest
,
371 ::testing::ValuesIn(predict_sizes_hbd_ssse3
));
375 const sub_avg_param sub_avg_sizes_avx2
[] = { ALL_CFL_TX_SIZES(
376 get_subtract_average_fn_avx2
) };
378 const subsample_param subsample_sizes_avx2
[] = { ALL_CFL_TX_SIZES(
379 cfl_get_luma_subsampling_420_lbd_avx2
) };
381 const predict_param predict_sizes_avx2
[] = { ALL_CFL_TX_SIZES(
382 get_predict_lbd_fn_avx2
) };
384 const predict_param_hbd predict_sizes_hbd_avx2
[] = { ALL_CFL_TX_SIZES(
385 get_predict_hbd_fn_avx2
) };
387 INSTANTIATE_TEST_CASE_P(AVX2
, CFLSubAvgTest
,
388 ::testing::ValuesIn(sub_avg_sizes_avx2
));
390 INSTANTIATE_TEST_CASE_P(AVX2
, CFLSubsampleTest
,
391 ::testing::ValuesIn(subsample_sizes_avx2
));
393 INSTANTIATE_TEST_CASE_P(AVX2
, CFLPredictTest
,
394 ::testing::ValuesIn(predict_sizes_avx2
));
396 INSTANTIATE_TEST_CASE_P(AVX2
, CFLPredictHBDTest
,
397 ::testing::ValuesIn(predict_sizes_hbd_avx2
));
401 const sub_avg_param sub_avg_sizes_neon
[] = { ALL_CFL_TX_SIZES(
402 get_subtract_average_fn_neon
) };
404 const subsample_param subsample_sizes_neon
[] = { ALL_CFL_TX_SIZES(
405 cfl_get_luma_subsampling_420_lbd_neon
) };
407 INSTANTIATE_TEST_CASE_P(NEON
, CFLSubAvgTest
,
408 ::testing::ValuesIn(sub_avg_sizes_neon
));
410 INSTANTIATE_TEST_CASE_P(NEON
, CFLSubsampleTest
,
411 ::testing::ValuesIn(subsample_sizes_neon
));