2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
19 #include "test/acm_random.h"
20 #include "test/register_state_check.h"
21 #include "test/util.h"
22 #include "av1/common/blockd.h"
23 #include "av1/common/common.h"
24 #include "av1/common/pred_common.h"
25 #include "aom_mem/aom_mem.h"
29 using libaom_test::ACMRandom
;
31 const int count_test_block
= 100000;
33 typedef void (*HighbdIntraPred
)(uint16_t *dst
, ptrdiff_t stride
,
34 const uint16_t *above
, const uint16_t *left
,
36 typedef void (*IntraPred
)(uint8_t *dst
, ptrdiff_t stride
, const uint8_t *above
,
41 // NOTE: Under gcc version 7.3.0 (Debian 7.3.0-5), if this template is in the
42 // anonymous namespace, then we get a strange compiler warning in
43 // the begin() and end() methods of the ParamGenerator template class in
44 // gtest/internal/gtest-param-util.h:
45 // warning: ‘<anonymous>’ is used uninitialized in this function
46 // As a workaround, put this template outside the anonymous namespace.
47 // See bug aomedia:2003.
48 template <typename FuncType
>
49 struct IntraPredFunc
{
50 IntraPredFunc(FuncType pred
= nullptr, FuncType ref
= nullptr,
51 int block_width_value
= 0, int block_height_value
= 0,
52 int bit_depth_value
= 0)
53 : pred_fn(pred
), ref_fn(ref
), block_width(block_width_value
),
54 block_height(block_height_value
), bit_depth(bit_depth_value
) {}
65 template <typename FuncType
, typename Pixel
>
66 class AV1IntraPredTest
67 : public ::testing::TestWithParam
<IntraPredFunc
<FuncType
> > {
69 void RunTest(Pixel
*left_col
, Pixel
*above_data
, Pixel
*dst
, Pixel
*ref_dst
) {
70 ACMRandom
rnd(ACMRandom::DeterministicSeed());
71 const int block_width
= params_
.block_width
;
72 const int block_height
= params_
.block_height
;
73 above_row_
= above_data
+ 16;
78 for (int i
= 0; i
< count_test_block
; ++i
) {
79 // Fill edges with random data, try first with saturated values.
80 for (int x
= -1; x
<= block_width
* 2; x
++) {
82 above_row_
[x
] = mask_
;
84 above_row_
[x
] = rnd
.Rand16() & mask_
;
87 for (int y
= 0; y
< block_height
; y
++) {
91 left_col_
[y
] = rnd
.Rand16() & mask_
;
95 CheckPrediction(i
, &error_count
);
97 ASSERT_EQ(0, error_count
);
99 void RunSpeedTest(Pixel
*left_col
, Pixel
*above_data
, Pixel
*dst
,
101 ACMRandom
rnd(ACMRandom::DeterministicSeed());
102 const int block_width
= params_
.block_width
;
103 const int block_height
= params_
.block_height
;
104 above_row_
= above_data
+ 16;
105 left_col_
= left_col
;
109 const int numIter
= 100;
112 int simd_sum_time
= 0;
113 for (int i
= 0; i
< count_test_block
; ++i
) {
114 // Fill edges with random data, try first with saturated values.
115 for (int x
= -1; x
<= block_width
* 2; x
++) {
117 above_row_
[x
] = mask_
;
119 above_row_
[x
] = rnd
.Rand16() & mask_
;
122 for (int y
= 0; y
< block_height
; y
++) {
124 left_col_
[y
] = mask_
;
126 left_col_
[y
] = rnd
.Rand16() & mask_
;
130 aom_usec_timer c_timer_
;
131 aom_usec_timer_start(&c_timer_
);
133 PredictRefSpeedTest(numIter
);
135 aom_usec_timer_mark(&c_timer_
);
137 aom_usec_timer simd_timer_
;
138 aom_usec_timer_start(&simd_timer_
);
140 PredictFncSpeedTest(numIter
);
142 aom_usec_timer_mark(&simd_timer_
);
144 c_sum_time
+= static_cast<int>(aom_usec_timer_elapsed(&c_timer_
));
145 simd_sum_time
+= static_cast<int>(aom_usec_timer_elapsed(&simd_timer_
));
147 CheckPrediction(i
, &error_count
);
151 "blockWxH = %d x %d c_time = %d \t simd_time = %d \t Gain = %4.2f \n",
152 block_width
, block_height
, c_sum_time
, simd_sum_time
,
153 (static_cast<float>(c_sum_time
) / static_cast<float>(simd_sum_time
)));
154 ASSERT_EQ(0, error_count
);
158 void SetUp() override
{
159 params_
= this->GetParam();
160 stride_
= params_
.block_width
* 3;
161 mask_
= (1 << params_
.bit_depth
) - 1;
164 virtual void Predict() = 0;
166 virtual void PredictRefSpeedTest(int num
) = 0;
167 virtual void PredictFncSpeedTest(int num
) = 0;
169 void CheckPrediction(int test_case_number
, int *error_count
) const {
170 // For each pixel ensure that the calculated value is the same as reference.
171 const int block_width
= params_
.block_width
;
172 const int block_height
= params_
.block_height
;
173 for (int y
= 0; y
< block_height
; y
++) {
174 for (int x
= 0; x
< block_width
; x
++) {
175 *error_count
+= ref_dst_
[x
+ y
* stride_
] != dst_
[x
+ y
* stride_
];
176 if (*error_count
== 1) {
177 ASSERT_EQ(ref_dst_
[x
+ y
* stride_
], dst_
[x
+ y
* stride_
])
178 << " Failed on Test Case Number " << test_case_number
179 << " location: x = " << x
<< " y = " << y
;
192 IntraPredFunc
<FuncType
> params_
;
195 #if CONFIG_AV1_HIGHBITDEPTH
196 class HighbdIntraPredTest
: public AV1IntraPredTest
<HighbdIntraPred
, uint16_t> {
198 void Predict() override
{
199 const int bit_depth
= params_
.bit_depth
;
200 params_
.ref_fn(ref_dst_
, stride_
, above_row_
, left_col_
, bit_depth
);
201 API_REGISTER_STATE_CHECK(
202 params_
.pred_fn(dst_
, stride_
, above_row_
, left_col_
, bit_depth
));
204 void PredictRefSpeedTest(int num
) override
{
205 const int bit_depth
= params_
.bit_depth
;
206 for (int i
= 0; i
< num
; i
++) {
207 params_
.ref_fn(ref_dst_
, stride_
, above_row_
, left_col_
, bit_depth
);
210 void PredictFncSpeedTest(int num
) override
{
211 const int bit_depth
= params_
.bit_depth
;
212 for (int i
= 0; i
< num
; i
++) {
213 params_
.pred_fn(dst_
, stride_
, above_row_
, left_col_
, bit_depth
);
217 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(HighbdIntraPredTest
);
221 class LowbdIntraPredTest
: public AV1IntraPredTest
<IntraPred
, uint8_t> {
223 void Predict() override
{
224 params_
.ref_fn(ref_dst_
, stride_
, above_row_
, left_col_
);
225 API_REGISTER_STATE_CHECK(
226 params_
.pred_fn(dst_
, stride_
, above_row_
, left_col_
));
228 void PredictRefSpeedTest(int num
) override
{
229 for (int i
= 0; i
< num
; i
++) {
230 params_
.ref_fn(ref_dst_
, stride_
, above_row_
, left_col_
);
233 void PredictFncSpeedTest(int num
) override
{
234 for (int i
= 0; i
< num
; i
++) {
235 params_
.pred_fn(dst_
, stride_
, above_row_
, left_col_
);
239 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(LowbdIntraPredTest
);
241 #if CONFIG_AV1_HIGHBITDEPTH
242 TEST_P(HighbdIntraPredTest
, Bitexact
) {
243 // max block size is 64
244 DECLARE_ALIGNED(16, uint16_t, left_col
[2 * 64]);
245 DECLARE_ALIGNED(16, uint16_t, above_data
[2 * 64 + 64]);
246 DECLARE_ALIGNED(16, uint16_t, dst
[3 * 64 * 64]);
247 DECLARE_ALIGNED(16, uint16_t, ref_dst
[3 * 64 * 64]);
249 av1_zero(above_data
);
250 RunTest(left_col
, above_data
, dst
, ref_dst
);
253 TEST_P(HighbdIntraPredTest
, DISABLED_Speed
) {
254 // max block size is 64
255 DECLARE_ALIGNED(16, uint16_t, left_col
[2 * 64]);
256 DECLARE_ALIGNED(16, uint16_t, above_data
[2 * 64 + 64]);
257 DECLARE_ALIGNED(16, uint16_t, dst
[3 * 64 * 64]);
258 DECLARE_ALIGNED(16, uint16_t, ref_dst
[3 * 64 * 64]);
260 av1_zero(above_data
);
261 RunSpeedTest(left_col
, above_data
, dst
, ref_dst
);
265 TEST_P(LowbdIntraPredTest
, Bitexact
) {
266 // max block size is 64
267 DECLARE_ALIGNED(16, uint8_t, left_col
[2 * 64]);
268 DECLARE_ALIGNED(16, uint8_t, above_data
[2 * 64 + 64]);
269 DECLARE_ALIGNED(16, uint8_t, dst
[3 * 64 * 64]);
270 DECLARE_ALIGNED(16, uint8_t, ref_dst
[3 * 64 * 64]);
272 av1_zero(above_data
);
273 RunTest(left_col
, above_data
, dst
, ref_dst
);
275 TEST_P(LowbdIntraPredTest
, DISABLED_Speed
) {
276 // max block size is 64
277 DECLARE_ALIGNED(16, uint8_t, left_col
[2 * 64]);
278 DECLARE_ALIGNED(16, uint8_t, above_data
[2 * 64 + 64]);
279 DECLARE_ALIGNED(16, uint8_t, dst
[3 * 64 * 64]);
280 DECLARE_ALIGNED(16, uint8_t, ref_dst
[3 * 64 * 64]);
282 av1_zero(above_data
);
283 RunSpeedTest(left_col
, above_data
, dst
, ref_dst
);
286 #if CONFIG_AV1_HIGHBITDEPTH
287 // -----------------------------------------------------------------------------
288 // High Bit Depth Tests
289 #define highbd_entry(type, width, height, opt, bd) \
290 IntraPredFunc<HighbdIntraPred>( \
291 &aom_highbd_##type##_predictor_##width##x##height##_##opt, \
292 &aom_highbd_##type##_predictor_##width##x##height##_c, width, height, \
295 #define highbd_intrapred(type, opt, bd) \
296 highbd_entry(type, 4, 4, opt, bd), highbd_entry(type, 4, 8, opt, bd), \
297 highbd_entry(type, 4, 16, opt, bd), highbd_entry(type, 8, 4, opt, bd), \
298 highbd_entry(type, 8, 8, opt, bd), highbd_entry(type, 8, 16, opt, bd), \
299 highbd_entry(type, 8, 32, opt, bd), highbd_entry(type, 16, 4, opt, bd), \
300 highbd_entry(type, 16, 8, opt, bd), highbd_entry(type, 16, 16, opt, bd), \
301 highbd_entry(type, 16, 32, opt, bd), \
302 highbd_entry(type, 16, 64, opt, bd), highbd_entry(type, 32, 8, opt, bd), \
303 highbd_entry(type, 32, 16, opt, bd), \
304 highbd_entry(type, 32, 32, opt, bd), \
305 highbd_entry(type, 32, 64, opt, bd), \
306 highbd_entry(type, 64, 16, opt, bd), \
307 highbd_entry(type, 64, 32, opt, bd), highbd_entry(type, 64, 64, opt, bd)
308 #endif // CONFIG_AV1_HIGHBITDEPTH
310 // ---------------------------------------------------------------------------
311 // Low Bit Depth Tests
313 #define lowbd_entry(type, width, height, opt) \
314 IntraPredFunc<IntraPred>(&aom_##type##_predictor_##width##x##height##_##opt, \
315 &aom_##type##_predictor_##width##x##height##_c, \
318 #define lowbd_intrapred(type, opt) \
319 lowbd_entry(type, 4, 4, opt), lowbd_entry(type, 4, 8, opt), \
320 lowbd_entry(type, 4, 16, opt), lowbd_entry(type, 8, 4, opt), \
321 lowbd_entry(type, 8, 8, opt), lowbd_entry(type, 8, 16, opt), \
322 lowbd_entry(type, 8, 32, opt), lowbd_entry(type, 16, 4, opt), \
323 lowbd_entry(type, 16, 8, opt), lowbd_entry(type, 16, 16, opt), \
324 lowbd_entry(type, 16, 32, opt), lowbd_entry(type, 16, 64, opt), \
325 lowbd_entry(type, 32, 8, opt), lowbd_entry(type, 32, 16, opt), \
326 lowbd_entry(type, 32, 32, opt), lowbd_entry(type, 32, 64, opt), \
327 lowbd_entry(type, 64, 16, opt), lowbd_entry(type, 64, 32, opt), \
328 lowbd_entry(type, 64, 64, opt)
331 const IntraPredFunc
<IntraPred
> LowbdIntraPredTestVector
[] = {
332 lowbd_intrapred(dc
, sse2
), lowbd_intrapred(dc_top
, sse2
),
333 lowbd_intrapred(dc_left
, sse2
), lowbd_intrapred(dc_128
, sse2
),
334 lowbd_intrapred(v
, sse2
), lowbd_intrapred(h
, sse2
),
337 INSTANTIATE_TEST_SUITE_P(SSE2
, LowbdIntraPredTest
,
338 ::testing::ValuesIn(LowbdIntraPredTestVector
));
342 const IntraPredFunc
<IntraPred
> LowbdIntraPredTestVectorNeon
[] = {
343 lowbd_intrapred(dc
, neon
), lowbd_intrapred(dc_top
, neon
),
344 lowbd_intrapred(dc_left
, neon
), lowbd_intrapred(dc_128
, neon
),
345 lowbd_intrapred(v
, neon
), lowbd_intrapred(h
, neon
),
346 lowbd_intrapred(smooth
, neon
), lowbd_intrapred(smooth_v
, neon
),
347 lowbd_intrapred(smooth_h
, neon
), lowbd_intrapred(paeth
, neon
),
350 INSTANTIATE_TEST_SUITE_P(NEON
, LowbdIntraPredTest
,
351 ::testing::ValuesIn(LowbdIntraPredTestVectorNeon
));
355 const IntraPredFunc
<IntraPred
> LowbdIntraPredTestVectorSsse3
[] = {
356 lowbd_intrapred(paeth
, ssse3
),
357 lowbd_intrapred(smooth
, ssse3
),
358 lowbd_intrapred(smooth_v
, ssse3
),
359 lowbd_intrapred(smooth_h
, ssse3
),
362 INSTANTIATE_TEST_SUITE_P(SSSE3
, LowbdIntraPredTest
,
363 ::testing::ValuesIn(LowbdIntraPredTestVectorSsse3
));
367 const IntraPredFunc
<IntraPred
> LowbdIntraPredTestVectorAvx2
[] = {
368 lowbd_entry(dc
, 32, 16, avx2
), lowbd_entry(dc
, 32, 32, avx2
),
369 lowbd_entry(dc
, 32, 64, avx2
), lowbd_entry(dc
, 64, 16, avx2
),
370 lowbd_entry(dc
, 64, 32, avx2
), lowbd_entry(dc
, 64, 64, avx2
),
372 lowbd_entry(dc_top
, 32, 16, avx2
), lowbd_entry(dc_top
, 32, 32, avx2
),
373 lowbd_entry(dc_top
, 32, 64, avx2
), lowbd_entry(dc_top
, 64, 16, avx2
),
374 lowbd_entry(dc_top
, 64, 32, avx2
), lowbd_entry(dc_top
, 64, 64, avx2
),
376 lowbd_entry(dc_left
, 32, 16, avx2
), lowbd_entry(dc_left
, 32, 32, avx2
),
377 lowbd_entry(dc_left
, 32, 64, avx2
), lowbd_entry(dc_left
, 64, 16, avx2
),
378 lowbd_entry(dc_left
, 64, 32, avx2
), lowbd_entry(dc_left
, 64, 64, avx2
),
380 lowbd_entry(dc_128
, 32, 16, avx2
), lowbd_entry(dc_128
, 32, 32, avx2
),
381 lowbd_entry(dc_128
, 32, 64, avx2
), lowbd_entry(dc_128
, 64, 16, avx2
),
382 lowbd_entry(dc_128
, 64, 32, avx2
), lowbd_entry(dc_128
, 64, 64, avx2
),
384 lowbd_entry(v
, 32, 16, avx2
), lowbd_entry(v
, 32, 32, avx2
),
385 lowbd_entry(v
, 32, 64, avx2
), lowbd_entry(v
, 64, 16, avx2
),
386 lowbd_entry(v
, 64, 32, avx2
), lowbd_entry(v
, 64, 64, avx2
),
388 lowbd_entry(h
, 32, 32, avx2
),
390 lowbd_entry(paeth
, 16, 8, avx2
), lowbd_entry(paeth
, 16, 16, avx2
),
391 lowbd_entry(paeth
, 16, 32, avx2
), lowbd_entry(paeth
, 16, 64, avx2
),
392 lowbd_entry(paeth
, 32, 16, avx2
), lowbd_entry(paeth
, 32, 32, avx2
),
393 lowbd_entry(paeth
, 32, 64, avx2
), lowbd_entry(paeth
, 64, 16, avx2
),
394 lowbd_entry(paeth
, 64, 32, avx2
), lowbd_entry(paeth
, 64, 64, avx2
),
397 INSTANTIATE_TEST_SUITE_P(AVX2
, LowbdIntraPredTest
,
398 ::testing::ValuesIn(LowbdIntraPredTestVectorAvx2
));
401 #if CONFIG_AV1_HIGHBITDEPTH
403 const IntraPredFunc
<HighbdIntraPred
> HighbdIntraPredTestVectorNeon
[] = {
404 highbd_intrapred(dc
, neon
, 12), highbd_intrapred(dc_top
, neon
, 12),
405 highbd_intrapred(dc_left
, neon
, 12), highbd_intrapred(dc_128
, neon
, 12),
406 highbd_intrapred(v
, neon
, 12), highbd_intrapred(h
, neon
, 12),
407 highbd_intrapred(paeth
, neon
, 12), highbd_intrapred(smooth
, neon
, 12),
408 highbd_intrapred(smooth_v
, neon
, 12), highbd_intrapred(smooth_h
, neon
, 12),
411 INSTANTIATE_TEST_SUITE_P(NEON
, HighbdIntraPredTest
,
412 ::testing::ValuesIn(HighbdIntraPredTestVectorNeon
));
416 const IntraPredFunc
<HighbdIntraPred
> HighbdIntraPredTestVectorSse2
[] = {
417 highbd_entry(dc
, 4, 4, sse2
, 12),
418 highbd_entry(dc
, 4, 8, sse2
, 12),
419 highbd_entry(dc
, 8, 4, sse2
, 12),
420 highbd_entry(dc
, 8, 8, sse2
, 12),
421 highbd_entry(dc
, 8, 16, sse2
, 12),
422 highbd_entry(dc
, 16, 8, sse2
, 12),
423 highbd_entry(dc
, 16, 16, sse2
, 12),
424 highbd_entry(dc
, 16, 32, sse2
, 12),
425 highbd_entry(dc
, 32, 16, sse2
, 12),
426 highbd_entry(dc
, 32, 32, sse2
, 12),
428 highbd_entry(dc_top
, 4, 4, sse2
, 12),
429 highbd_entry(dc_top
, 4, 8, sse2
, 12),
430 highbd_entry(dc_top
, 8, 4, sse2
, 12),
431 highbd_entry(dc_top
, 8, 8, sse2
, 12),
432 highbd_entry(dc_top
, 8, 16, sse2
, 12),
433 highbd_entry(dc_top
, 16, 8, sse2
, 12),
434 highbd_entry(dc_top
, 16, 16, sse2
, 12),
435 highbd_entry(dc_top
, 16, 32, sse2
, 12),
436 highbd_entry(dc_top
, 32, 16, sse2
, 12),
437 highbd_entry(dc_top
, 32, 32, sse2
, 12),
439 highbd_entry(dc_left
, 4, 4, sse2
, 12),
440 highbd_entry(dc_left
, 4, 8, sse2
, 12),
441 highbd_entry(dc_left
, 8, 4, sse2
, 12),
442 highbd_entry(dc_left
, 8, 8, sse2
, 12),
443 highbd_entry(dc_left
, 8, 16, sse2
, 12),
444 highbd_entry(dc_left
, 16, 8, sse2
, 12),
445 highbd_entry(dc_left
, 16, 16, sse2
, 12),
446 highbd_entry(dc_left
, 16, 32, sse2
, 12),
447 highbd_entry(dc_left
, 32, 16, sse2
, 12),
448 highbd_entry(dc_left
, 32, 32, sse2
, 12),
450 highbd_entry(dc_128
, 4, 4, sse2
, 12),
451 highbd_entry(dc_128
, 4, 8, sse2
, 12),
452 highbd_entry(dc_128
, 8, 4, sse2
, 12),
453 highbd_entry(dc_128
, 8, 8, sse2
, 12),
454 highbd_entry(dc_128
, 8, 16, sse2
, 12),
455 highbd_entry(dc_128
, 16, 8, sse2
, 12),
456 highbd_entry(dc_128
, 16, 16, sse2
, 12),
457 highbd_entry(dc_128
, 16, 32, sse2
, 12),
458 highbd_entry(dc_128
, 32, 16, sse2
, 12),
459 highbd_entry(dc_128
, 32, 32, sse2
, 12),
461 highbd_entry(v
, 4, 4, sse2
, 12),
462 highbd_entry(v
, 4, 8, sse2
, 12),
463 highbd_entry(v
, 8, 4, sse2
, 12),
464 highbd_entry(v
, 8, 8, sse2
, 12),
465 highbd_entry(v
, 8, 16, sse2
, 12),
466 highbd_entry(v
, 16, 8, sse2
, 12),
467 highbd_entry(v
, 16, 16, sse2
, 12),
468 highbd_entry(v
, 16, 32, sse2
, 12),
469 highbd_entry(v
, 32, 16, sse2
, 12),
470 highbd_entry(v
, 32, 32, sse2
, 12),
472 highbd_entry(h
, 4, 4, sse2
, 12),
473 highbd_entry(h
, 4, 8, sse2
, 12),
474 highbd_entry(h
, 8, 4, sse2
, 12),
475 highbd_entry(h
, 8, 8, sse2
, 12),
476 highbd_entry(h
, 8, 16, sse2
, 12),
477 highbd_entry(h
, 16, 8, sse2
, 12),
478 highbd_entry(h
, 16, 16, sse2
, 12),
479 highbd_entry(h
, 16, 32, sse2
, 12),
480 highbd_entry(h
, 32, 16, sse2
, 12),
481 highbd_entry(h
, 32, 32, sse2
, 12),
484 INSTANTIATE_TEST_SUITE_P(SSE2
, HighbdIntraPredTest
,
485 ::testing::ValuesIn(HighbdIntraPredTestVectorSse2
));
487 #endif // CONFIG_AV1_HIGHBITDEPTH