Improved aom_smooth_predictor_16x 32,16,8
[aom.git] / test / cfl_test.cc
blob5dfd28eade18dc998c81e2c48d9115ee99ada6d0
1 /*
2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
11 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
13 #include "aom_ports/aom_timer.h"
14 #include "./av1_rtcd.h"
15 #include "test/util.h"
16 #include "test/acm_random.h"
18 using ::testing::make_tuple;
20 using libaom_test::ACMRandom;
22 #define NUM_ITERATIONS (100)
23 #define NUM_ITERATIONS_SPEED (INT16_MAX)
25 #define ALL_CFL_TX_SIZES(function) \
26 make_tuple(TX_4X4, &function), make_tuple(TX_4X8, &function), \
27 make_tuple(TX_4X16, &function), make_tuple(TX_8X4, &function), \
28 make_tuple(TX_8X8, &function), make_tuple(TX_8X16, &function), \
29 make_tuple(TX_8X32, &function), make_tuple(TX_16X4, &function), \
30 make_tuple(TX_16X8, &function), make_tuple(TX_16X16, &function), \
31 make_tuple(TX_16X32, &function), make_tuple(TX_32X8, &function), \
32 make_tuple(TX_32X16, &function), make_tuple(TX_32X32, &function)
34 namespace {
35 typedef cfl_subsample_lbd_fn (*get_subsample_fn)(TX_SIZE tx_size);
37 typedef cfl_predict_lbd_fn (*get_predict_fn)(TX_SIZE tx_size);
39 typedef cfl_predict_hbd_fn (*get_predict_fn_hbd)(TX_SIZE tx_size);
41 typedef cfl_subtract_average_fn (*sub_avg_fn)(TX_SIZE tx_size);
43 typedef ::testing::tuple<TX_SIZE, get_subsample_fn> subsample_param;
45 typedef ::testing::tuple<TX_SIZE, get_predict_fn> predict_param;
47 typedef ::testing::tuple<TX_SIZE, get_predict_fn_hbd> predict_param_hbd;
49 typedef ::testing::tuple<TX_SIZE, sub_avg_fn> sub_avg_param;
51 template <typename A>
52 static void assert_eq(const A *a, const A *b, int width, int height) {
53 for (int j = 0; j < height; j++) {
54 for (int i = 0; i < width; i++) {
55 ASSERT_EQ(a[j * CFL_BUF_LINE + i], b[j * CFL_BUF_LINE + i]);
60 static void assertFaster(int ref_elapsed_time, int elapsed_time) {
61 EXPECT_GT(ref_elapsed_time, elapsed_time)
62 << "Error: CFLSubtractSpeedTest, SIMD slower than C." << std::endl
63 << "C time: " << ref_elapsed_time << " us" << std::endl
64 << "SIMD time: " << elapsed_time << " us" << std::endl;
67 static void printSpeed(int ref_elapsed_time, int elapsed_time, int width,
68 int height) {
69 std::cout.precision(2);
70 std::cout << "[ ] " << width << "x" << height
71 << ": C time = " << ref_elapsed_time
72 << " us, SIMD time = " << elapsed_time << " us"
73 << " (~" << ref_elapsed_time / (double)elapsed_time << "x) "
74 << std::endl;
77 template <typename F>
78 class CFLTest
79 : public ::testing::TestWithParam< ::testing::tuple<TX_SIZE, F> > {
80 public:
81 virtual ~CFLTest() {}
82 virtual void SetUp() {
83 tx_size = ::testing::get<0>(this->GetParam());
84 width = tx_size_wide[tx_size];
85 height = tx_size_high[tx_size];
86 fun_under_test = ::testing::get<1>(this->GetParam());
87 rnd(ACMRandom::DeterministicSeed());
90 protected:
91 ACMRandom rnd;
92 F fun_under_test;
93 TX_SIZE tx_size;
94 int width;
95 int height;
98 template <typename F, typename I>
99 class CFLTestWithData : public CFLTest<F> {
100 public:
101 virtual ~CFLTestWithData() {}
103 protected:
104 I data[CFL_BUF_SQUARE];
105 I data_ref[CFL_BUF_SQUARE];
107 void init(I (ACMRandom::*random)()) {
108 for (int j = 0; j < this->height; j++) {
109 for (int i = 0; i < this->width; i++) {
110 const I d = (this->rnd.*random)();
111 data[j * CFL_BUF_LINE + i] = d;
112 data_ref[j * CFL_BUF_LINE + i] = d;
118 template <typename F, typename I>
119 class CFLTestWithAlignedData : public CFLTest<F> {
120 public:
121 virtual ~CFLTestWithAlignedData() {}
122 virtual void SetUp() {
123 CFLTest<F>::SetUp();
124 chroma_pels_ref =
125 reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
126 chroma_pels =
127 reinterpret_cast<I *>(aom_memalign(32, sizeof(I) * CFL_BUF_SQUARE));
128 sub_luma_pels_ref = reinterpret_cast<int16_t *>(
129 aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
130 sub_luma_pels = reinterpret_cast<int16_t *>(
131 aom_memalign(32, sizeof(int16_t) * CFL_BUF_SQUARE));
132 memset(chroma_pels_ref, 0, sizeof(I) * CFL_BUF_SQUARE);
133 memset(chroma_pels, 0, sizeof(I) * CFL_BUF_SQUARE);
134 memset(sub_luma_pels_ref, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
135 memset(sub_luma_pels, 0, sizeof(int16_t) * CFL_BUF_SQUARE);
138 virtual void TearDown() {
139 aom_free(chroma_pels_ref);
140 aom_free(sub_luma_pels_ref);
141 aom_free(chroma_pels);
142 aom_free(sub_luma_pels);
145 protected:
146 I *chroma_pels_ref;
147 I *chroma_pels;
148 int16_t *sub_luma_pels_ref;
149 int16_t *sub_luma_pels;
150 int alpha_q3;
151 I dc;
152 void init(int bd) {
153 alpha_q3 = this->rnd(33) - 16;
154 dc = this->rnd(1 << bd);
155 for (int j = 0; j < this->height; j++) {
156 for (int i = 0; i < this->width; i++) {
157 chroma_pels[j * CFL_BUF_LINE + i] = dc;
158 chroma_pels_ref[j * CFL_BUF_LINE + i] = dc;
159 sub_luma_pels_ref[j * CFL_BUF_LINE + i] =
160 sub_luma_pels[j * CFL_BUF_LINE + i] = this->rnd.Rand15Signed();
166 class CFLSubAvgTest : public CFLTestWithData<sub_avg_fn, int16_t> {
167 public:
168 virtual ~CFLSubAvgTest() {}
171 class CFLSubsampleTest : public CFLTestWithData<get_subsample_fn, uint8_t> {
172 public:
173 virtual ~CFLSubsampleTest() {}
176 class CFLPredictTest : public CFLTestWithAlignedData<get_predict_fn, uint8_t> {
177 public:
178 virtual ~CFLPredictTest() {}
181 class CFLPredictHBDTest
182 : public CFLTestWithAlignedData<get_predict_fn_hbd, uint16_t> {
183 public:
184 virtual ~CFLPredictHBDTest() {}
187 TEST_P(CFLSubAvgTest, SubAvgTest) {
188 const cfl_subtract_average_fn ref_sub = get_subtract_average_fn_c(tx_size);
189 const cfl_subtract_average_fn sub = fun_under_test(tx_size);
190 for (int it = 0; it < NUM_ITERATIONS; it++) {
191 init(&ACMRandom::Rand15Signed);
192 sub(data);
193 ref_sub(data_ref);
194 assert_eq<int16_t>(data, data_ref, width, height);
198 TEST_P(CFLSubAvgTest, DISABLED_SubAvgSpeedTest) {
199 const cfl_subtract_average_fn ref_sub = get_subtract_average_fn_c(tx_size);
200 const cfl_subtract_average_fn sub = fun_under_test(tx_size);
202 aom_usec_timer ref_timer;
203 aom_usec_timer timer;
205 init(&ACMRandom::Rand15Signed);
206 aom_usec_timer_start(&ref_timer);
207 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
208 ref_sub(data_ref);
210 aom_usec_timer_mark(&ref_timer);
211 int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
213 aom_usec_timer_start(&timer);
214 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
215 sub(data);
217 aom_usec_timer_mark(&timer);
218 int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
220 printSpeed(ref_elapsed_time, elapsed_time, width, height);
221 assertFaster(ref_elapsed_time, elapsed_time);
224 TEST_P(CFLSubsampleTest, SubsampleTest) {
225 int16_t sub_luma_pels[CFL_BUF_SQUARE];
226 int16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
227 const int sub_width = width >> 1;
228 const int sub_height = height >> 1;
230 for (int it = 0; it < NUM_ITERATIONS; it++) {
231 init(&ACMRandom::Rand8);
232 fun_under_test(tx_size)(data, CFL_BUF_LINE, sub_luma_pels);
233 cfl_get_luma_subsampling_420_lbd_c(tx_size)(data_ref, CFL_BUF_LINE,
234 sub_luma_pels_ref);
235 assert_eq<int16_t>(sub_luma_pels, sub_luma_pels_ref, sub_width, sub_height);
239 TEST_P(CFLSubsampleTest, DISABLED_SubsampleSpeedTest) {
240 int16_t sub_luma_pels[CFL_BUF_SQUARE];
241 int16_t sub_luma_pels_ref[CFL_BUF_SQUARE];
242 cfl_subsample_lbd_fn subsample = fun_under_test(tx_size);
243 cfl_subsample_lbd_fn subsample_ref =
244 cfl_get_luma_subsampling_420_lbd_c(tx_size);
245 aom_usec_timer ref_timer;
246 aom_usec_timer timer;
248 init(&ACMRandom::Rand8);
249 aom_usec_timer_start(&ref_timer);
250 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
251 subsample_ref(data_ref, CFL_BUF_LINE, sub_luma_pels);
253 aom_usec_timer_mark(&ref_timer);
254 int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
256 aom_usec_timer_start(&timer);
257 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
258 subsample(data, CFL_BUF_LINE, sub_luma_pels_ref);
260 aom_usec_timer_mark(&timer);
261 int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
263 printSpeed(ref_elapsed_time, elapsed_time, width, height);
264 assertFaster(ref_elapsed_time, elapsed_time);
267 TEST_P(CFLPredictTest, PredictTest) {
268 for (int it = 0; it < NUM_ITERATIONS; it++) {
269 init(8);
270 fun_under_test(tx_size)(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
271 get_predict_lbd_fn_c(tx_size)(sub_luma_pels_ref, chroma_pels_ref,
272 CFL_BUF_LINE, alpha_q3);
274 assert_eq<uint8_t>(chroma_pels, chroma_pels_ref, width, height);
278 TEST_P(CFLPredictTest, DISABLED_PredictSpeedTest) {
279 aom_usec_timer ref_timer;
280 aom_usec_timer timer;
282 init(8);
283 cfl_predict_lbd_fn predict_impl = get_predict_lbd_fn_c(tx_size);
284 aom_usec_timer_start(&ref_timer);
286 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
287 predict_impl(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3);
289 aom_usec_timer_mark(&ref_timer);
290 int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
292 predict_impl = fun_under_test(tx_size);
293 aom_usec_timer_start(&timer);
294 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
295 predict_impl(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3);
297 aom_usec_timer_mark(&timer);
298 int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
300 printSpeed(ref_elapsed_time, elapsed_time, width, height);
301 assertFaster(ref_elapsed_time, elapsed_time);
304 TEST_P(CFLPredictHBDTest, PredictHBDTest) {
305 int bd = 12;
306 for (int it = 0; it < NUM_ITERATIONS; it++) {
307 init(bd);
308 fun_under_test(tx_size)(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3,
309 bd);
310 get_predict_hbd_fn_c(tx_size)(sub_luma_pels_ref, chroma_pels_ref,
311 CFL_BUF_LINE, alpha_q3, bd);
313 assert_eq<uint16_t>(chroma_pels, chroma_pels_ref, width, height);
317 TEST_P(CFLPredictHBDTest, DISABLED_PredictHBDSpeedTest) {
318 aom_usec_timer ref_timer;
319 aom_usec_timer timer;
320 int bd = 12;
321 init(bd);
322 cfl_predict_hbd_fn predict_impl = get_predict_hbd_fn_c(tx_size);
323 aom_usec_timer_start(&ref_timer);
325 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
326 predict_impl(sub_luma_pels_ref, chroma_pels_ref, CFL_BUF_LINE, alpha_q3,
327 bd);
329 aom_usec_timer_mark(&ref_timer);
330 int ref_elapsed_time = (int)aom_usec_timer_elapsed(&ref_timer);
332 predict_impl = fun_under_test(tx_size);
333 aom_usec_timer_start(&timer);
334 for (int k = 0; k < NUM_ITERATIONS_SPEED; k++) {
335 predict_impl(sub_luma_pels, chroma_pels, CFL_BUF_LINE, alpha_q3, bd);
337 aom_usec_timer_mark(&timer);
338 int elapsed_time = (int)aom_usec_timer_elapsed(&timer);
340 printSpeed(ref_elapsed_time, elapsed_time, width, height);
341 assertFaster(ref_elapsed_time, elapsed_time);
344 #if HAVE_SSE2
345 const sub_avg_param sub_avg_sizes_sse2[] = { ALL_CFL_TX_SIZES(
346 get_subtract_average_fn_sse2) };
348 INSTANTIATE_TEST_CASE_P(SSE2, CFLSubAvgTest,
349 ::testing::ValuesIn(sub_avg_sizes_sse2));
351 #endif
353 #if HAVE_SSSE3
355 const subsample_param subsample_sizes_ssse3[] = { ALL_CFL_TX_SIZES(
356 cfl_get_luma_subsampling_420_lbd_ssse3) };
358 const predict_param predict_sizes_ssse3[] = { ALL_CFL_TX_SIZES(
359 get_predict_lbd_fn_ssse3) };
361 const predict_param_hbd predict_sizes_hbd_ssse3[] = { ALL_CFL_TX_SIZES(
362 get_predict_hbd_fn_ssse3) };
364 INSTANTIATE_TEST_CASE_P(SSSE3, CFLSubsampleTest,
365 ::testing::ValuesIn(subsample_sizes_ssse3));
367 INSTANTIATE_TEST_CASE_P(SSSE3, CFLPredictTest,
368 ::testing::ValuesIn(predict_sizes_ssse3));
370 INSTANTIATE_TEST_CASE_P(SSSE3, CFLPredictHBDTest,
371 ::testing::ValuesIn(predict_sizes_hbd_ssse3));
372 #endif
374 #if HAVE_AVX2
375 const sub_avg_param sub_avg_sizes_avx2[] = { ALL_CFL_TX_SIZES(
376 get_subtract_average_fn_avx2) };
378 const subsample_param subsample_sizes_avx2[] = { ALL_CFL_TX_SIZES(
379 cfl_get_luma_subsampling_420_lbd_avx2) };
381 const predict_param predict_sizes_avx2[] = { ALL_CFL_TX_SIZES(
382 get_predict_lbd_fn_avx2) };
384 const predict_param_hbd predict_sizes_hbd_avx2[] = { ALL_CFL_TX_SIZES(
385 get_predict_hbd_fn_avx2) };
387 INSTANTIATE_TEST_CASE_P(AVX2, CFLSubAvgTest,
388 ::testing::ValuesIn(sub_avg_sizes_avx2));
390 INSTANTIATE_TEST_CASE_P(AVX2, CFLSubsampleTest,
391 ::testing::ValuesIn(subsample_sizes_avx2));
393 INSTANTIATE_TEST_CASE_P(AVX2, CFLPredictTest,
394 ::testing::ValuesIn(predict_sizes_avx2));
396 INSTANTIATE_TEST_CASE_P(AVX2, CFLPredictHBDTest,
397 ::testing::ValuesIn(predict_sizes_hbd_avx2));
398 #endif
400 #if HAVE_NEON
401 const sub_avg_param sub_avg_sizes_neon[] = { ALL_CFL_TX_SIZES(
402 get_subtract_average_fn_neon) };
404 const subsample_param subsample_sizes_neon[] = { ALL_CFL_TX_SIZES(
405 cfl_get_luma_subsampling_420_lbd_neon) };
407 INSTANTIATE_TEST_CASE_P(NEON, CFLSubAvgTest,
408 ::testing::ValuesIn(sub_avg_sizes_neon));
410 INSTANTIATE_TEST_CASE_P(NEON, CFLSubsampleTest,
411 ::testing::ValuesIn(subsample_sizes_neon));
413 #endif
414 } // namespace