2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
16 #include "./av1_rtcd.h"
17 #include "aom_ports/aom_timer.h"
18 #include "test/acm_random.h"
19 #include "test/clear_system_state.h"
20 #include "test/register_state_check.h"
21 #include "test/util.h"
24 #include "av1/common/common_data.h"
28 const int kTestIters
= 10;
29 const int kPerfIters
= 1000;
33 const int kXStepQn
= 16;
34 const int kYStepQn
= 20;
36 using std::tr1::tuple
;
37 using std::tr1::make_tuple
;
38 using libaom_test::ACMRandom
;
40 enum NTaps
{ EIGHT_TAP
, TEN_TAP
, TWELVE_TAP
};
41 int NTapsToInt(NTaps ntaps
) { return 8 + static_cast<int>(ntaps
) * 2; }
43 // A 16-bit filter with a configurable number of taps.
46 void set(NTaps ntaps
, bool backwards
);
48 InterpFilterParams params_
;
51 std::vector
<int16_t> coeffs_
;
54 void TestFilter::set(NTaps ntaps
, bool backwards
) {
55 const int n
= NTapsToInt(ntaps
);
56 assert(n
>= 8 && n
<= 12);
58 // The filter has n * SUBPEL_SHIFTS proper elements and an extra 8 bogus
59 // elements at the end so that convolutions can read off the end safely.
60 coeffs_
.resize(n
* SUBPEL_SHIFTS
+ 8);
62 // The coefficients are pretty much arbitrary, but convolutions shouldn't
63 // over or underflow. For the first filter (subpels = 0), we use an
64 // increasing or decreasing ramp (depending on the backwards parameter). We
65 // don't want any zero coefficients, so we make it have an x-intercept at -1
66 // or n. To ensure absence of under/overflow, we normalise the area under the
67 // ramp to be I = 1 << FILTER_BITS (so that convolving a constant function
68 // gives the identity).
70 // When increasing, the function has the form:
74 // Summing and rearranging for A gives A = 2 * I / (n * (n + 1)). If the
75 // filter is reversed, we have the same A but with formula
78 const int I
= 1 << FILTER_BITS
;
79 const float A
= 2.f
* I
/ (n
* (n
+ 1.f
));
80 for (int i
= 0; i
< n
; ++i
) {
81 coeffs_
[i
] = static_cast<int16_t>(A
* (backwards
? (n
- i
) : (i
+ 1)));
84 // For the other filters, make them slightly different by swapping two
85 // columns. Filter k will have the columns (k % n) and (7 * k) % n swapped.
86 const size_t filter_size
= sizeof(coeffs_
[0] * n
);
87 int16_t *const filter0
= &coeffs_
[0];
88 for (int k
= 1; k
< SUBPEL_SHIFTS
; ++k
) {
89 int16_t *filterk
= &coeffs_
[k
* n
];
90 memcpy(filterk
, filter0
, filter_size
);
92 const int idx0
= k
% n
;
93 const int idx1
= (7 * k
) % n
;
95 const int16_t tmp
= filterk
[idx0
];
96 filterk
[idx0
] = filterk
[idx1
];
100 // Finally, write some rubbish at the end to make sure we don't use it.
101 for (int i
= 0; i
< 8; ++i
) coeffs_
[n
* SUBPEL_SHIFTS
+ i
] = 123 + i
;
104 params_
.filter_ptr
= &coeffs_
[0];
106 // These are ignored by the functions being tested. Set them to whatever.
107 params_
.subpel_shifts
= SUBPEL_SHIFTS
;
108 params_
.interp_filter
= EIGHTTAP_REGULAR
;
111 template <typename SrcPixel
>
114 TestImage(int w
, int h
, int bd
) : w_(w
), h_(h
), bd_(bd
) {
116 assert(bd
<= 8 * static_cast<int>(sizeof(SrcPixel
)));
118 // Pad width by 2*kHPad and then round up to the next multiple of 16
119 // to get src_stride_. Add another 16 for dst_stride_ (to make sure
120 // something goes wrong if we use the wrong one)
121 src_stride_
= (w_
+ 2 * kHPad
+ 15) & ~15;
122 dst_stride_
= src_stride_
+ 16;
124 // Allocate image data
125 src_data_
.resize(2 * src_block_size());
126 dst_data_
.resize(2 * dst_block_size());
129 void Initialize(ACMRandom
*rnd
);
132 int src_stride() const { return src_stride_
; }
133 int dst_stride() const { return dst_stride_
; }
135 int src_block_size() const { return (h_
+ 2 * kVPad
) * src_stride(); }
136 int dst_block_size() const { return (h_
+ 2 * kVPad
) * dst_stride(); }
138 const SrcPixel
*GetSrcData(bool ref
, bool borders
) const {
139 const SrcPixel
*block
= &src_data_
[ref
? 0 : src_block_size()];
140 return borders
? block
: block
+ kHPad
+ src_stride_
* kVPad
;
143 int32_t *GetDstData(bool ref
, bool borders
) {
144 int32_t *block
= &dst_data_
[ref
? 0 : dst_block_size()];
145 return borders
? block
: block
+ kHPad
+ dst_stride_
* kVPad
;
150 int src_stride_
, dst_stride_
;
152 std::vector
<SrcPixel
> src_data_
;
153 std::vector
<int32_t> dst_data_
;
156 template <typename Pixel
>
157 void FillEdge(ACMRandom
*rnd
, int num_pixels
, int bd
, bool trash
, Pixel
*data
) {
159 memset(data
, 0, sizeof(*data
) * num_pixels
);
162 const Pixel mask
= (1 << bd
) - 1;
163 for (int i
= 0; i
< num_pixels
; ++i
) data
[i
] = rnd
->Rand16() & mask
;
166 template <typename Pixel
>
167 void PrepBuffers(ACMRandom
*rnd
, int w
, int h
, int stride
, int bd
,
168 bool trash_edges
, Pixel
*data
) {
170 const Pixel mask
= (1 << bd
) - 1;
172 // Fill in the first buffer with random data
174 FillEdge(rnd
, stride
* kVPad
, bd
, trash_edges
, data
);
175 for (int r
= 0; r
< h
; ++r
) {
176 Pixel
*row_data
= data
+ (kVPad
+ r
) * stride
;
177 // Left border, contents, right border
178 FillEdge(rnd
, kHPad
, bd
, trash_edges
, row_data
);
179 for (int c
= 0; c
< w
; ++c
) row_data
[kHPad
+ c
] = rnd
->Rand16() & mask
;
180 FillEdge(rnd
, kHPad
, bd
, trash_edges
, row_data
+ kHPad
+ w
);
183 FillEdge(rnd
, stride
* kVPad
, bd
, trash_edges
, data
+ stride
* (kVPad
+ h
));
185 const int bpp
= sizeof(*data
);
186 const int block_elts
= stride
* (h
+ 2 * kVPad
);
187 const int block_size
= bpp
* block_elts
;
189 // Now copy that to the second buffer
190 memcpy(data
+ block_elts
, data
, block_size
);
193 template <typename SrcPixel
>
194 void TestImage
<SrcPixel
>::Initialize(ACMRandom
*rnd
) {
195 PrepBuffers(rnd
, w_
, h_
, src_stride_
, bd_
, false, &src_data_
[0]);
196 PrepBuffers(rnd
, w_
, h_
, dst_stride_
, bd_
, true, &dst_data_
[0]);
199 template <typename SrcPixel
>
200 void TestImage
<SrcPixel
>::Check() const {
201 // If memcmp returns 0, there's nothing to do.
202 const int num_pixels
= dst_block_size();
203 const int32_t *ref_dst
= &dst_data_
[0];
204 const int32_t *tst_dst
= &dst_data_
[num_pixels
];
206 if (0 == memcmp(ref_dst
, tst_dst
, sizeof(*ref_dst
) * num_pixels
)) return;
208 // Otherwise, iterate through the buffer looking for differences (including
210 const int stride
= dst_stride_
;
211 for (int r
= 0; r
< h_
+ 2 * kVPad
; ++r
) {
212 for (int c
= 0; c
< w_
+ 2 * kHPad
; ++c
) {
213 const int32_t ref_value
= ref_dst
[r
* stride
+ c
];
214 const int32_t tst_value
= tst_dst
[r
* stride
+ c
];
216 EXPECT_EQ(tst_value
, ref_value
)
217 << "Error at row: " << (r
- kVPad
) << ", col: " << (c
- kHPad
);
222 typedef tuple
<int, int> BlockDimension
;
225 BaseParams(BlockDimension dims
, NTaps ntaps_x
, NTaps ntaps_y
, bool avg
)
226 : dims(dims
), ntaps_x(ntaps_x
), ntaps_y(ntaps_y
), avg(avg
) {}
229 NTaps ntaps_x
, ntaps_y
;
233 template <typename SrcPixel
>
234 class ConvolveScaleTestBase
: public ::testing::Test
{
236 ConvolveScaleTestBase() : image_(NULL
) {}
237 virtual ~ConvolveScaleTestBase() { delete image_
; }
238 virtual void TearDown() { libaom_test::ClearSystemState(); }
240 // Implemented by subclasses (SetUp depends on the parameters passed
241 // in and RunOne depends on the function to be tested. These can't
242 // be templated for low/high bit depths because they have different
243 // numbers of parameters)
244 virtual void SetUp() = 0;
245 virtual void RunOne(bool ref
) = 0;
248 void SetParams(const BaseParams
¶ms
, int bd
) {
249 width_
= std::tr1::get
<0>(params
.dims
);
250 height_
= std::tr1::get
<1>(params
.dims
);
251 ntaps_x_
= params
.ntaps_x
;
252 ntaps_y_
= params
.ntaps_y
;
256 filter_x_
.set(ntaps_x_
, false);
257 filter_y_
.set(ntaps_y_
, true);
259 get_conv_params_no_round(0, avg_
!= false, 0, NULL
, 0, 1);
262 image_
= new TestImage
<SrcPixel
>(width_
, height_
, bd_
);
266 void SetConvParamOffset(int i
, int j
) {
267 if (i
== -1 && j
== -1) {
268 convolve_params_
.use_jnt_comp_avg
= 0;
270 convolve_params_
.use_jnt_comp_avg
= 1;
271 convolve_params_
.fwd_offset
= quant_dist_lookup_table
[i
][j
][0];
272 convolve_params_
.bck_offset
= quant_dist_lookup_table
[i
][j
][1];
275 #endif // CONFIG_JNT_COMP
278 ACMRandom
rnd(ACMRandom::DeterministicSeed());
279 for (int i
= 0; i
< kTestIters
; ++i
) {
281 SetConvParamOffset(-1, -1);
287 for (int j
= 0; j
< 2; ++j
) {
288 for (int k
= 0; k
< 4; ++k
) {
289 SetConvParamOffset(j
, k
);
301 #endif // CONFIG_JNT_COMP
306 ACMRandom
rnd(ACMRandom::DeterministicSeed());
309 aom_usec_timer ref_timer
;
310 aom_usec_timer_start(&ref_timer
);
311 for (int i
= 0; i
< kPerfIters
; ++i
) RunOne(true);
312 aom_usec_timer_mark(&ref_timer
);
313 const int64_t ref_time
= aom_usec_timer_elapsed(&ref_timer
);
315 aom_usec_timer tst_timer
;
316 aom_usec_timer_start(&tst_timer
);
317 for (int i
= 0; i
< kPerfIters
; ++i
) RunOne(false);
318 aom_usec_timer_mark(&tst_timer
);
319 const int64_t tst_time
= aom_usec_timer_elapsed(&tst_timer
);
321 std::cout
<< "[ ] C time = " << ref_time
/ 1000
322 << " ms, SIMD time = " << tst_time
/ 1000 << " ms\n";
324 EXPECT_GT(ref_time
, tst_time
)
325 << "Error: CDEFSpeedTest, SIMD slower than C.\n"
326 << "C time: " << ref_time
<< " us\n"
327 << "SIMD time: " << tst_time
<< " us\n";
330 static int RandomSubpel(ACMRandom
*rnd
) {
331 const uint8_t subpel_mode
= rnd
->Rand8();
332 if ((subpel_mode
& 7) == 0) {
334 } else if ((subpel_mode
& 7) == 1) {
335 return SCALE_SUBPEL_SHIFTS
- 1;
337 return 1 + rnd
->PseudoUniform(SCALE_SUBPEL_SHIFTS
- 2);
341 void Prep(ACMRandom
*rnd
) {
344 // Choose subpel_x_ and subpel_y_. They should be less than
345 // SCALE_SUBPEL_SHIFTS; we also want to add extra weight to "interesting"
346 // values: 0 and SCALE_SUBPEL_SHIFTS - 1
347 subpel_x_
= RandomSubpel(rnd
);
348 subpel_y_
= RandomSubpel(rnd
);
350 image_
->Initialize(rnd
);
353 int width_
, height_
, bd_
;
354 NTaps ntaps_x_
, ntaps_y_
;
356 int subpel_x_
, subpel_y_
;
357 TestFilter filter_x_
, filter_y_
;
358 TestImage
<SrcPixel
> *image_
;
359 ConvolveParams convolve_params_
;
362 typedef tuple
<int, int> BlockDimension
;
364 typedef void (*LowbdConvolveFunc
)(const uint8_t *src
, int src_stride
,
365 int32_t *dst
, int dst_stride
, int w
, int h
,
366 InterpFilterParams
*filter_params_x
,
367 InterpFilterParams
*filter_params_y
,
368 const int subpel_x_qn
, const int x_step_qn
,
369 const int subpel_y_qn
, const int y_step_qn
,
370 ConvolveParams
*conv_params
);
372 // Test parameter list:
373 // <tst_fun, dims, ntaps_x, ntaps_y, avg>
374 typedef tuple
<LowbdConvolveFunc
, BlockDimension
, NTaps
, NTaps
, bool>
377 class LowBDConvolveScaleTest
378 : public ConvolveScaleTestBase
<uint8_t>,
379 public ::testing::WithParamInterface
<LowBDParams
> {
381 virtual ~LowBDConvolveScaleTest() {}
384 tst_fun_
= GET_PARAM(0);
386 const BlockDimension
&block
= GET_PARAM(1);
387 const NTaps ntaps_x
= GET_PARAM(2);
388 const NTaps ntaps_y
= GET_PARAM(3);
390 const bool avg
= GET_PARAM(4);
392 SetParams(BaseParams(block
, ntaps_x
, ntaps_y
, avg
), bd
);
395 void RunOne(bool ref
) {
396 const uint8_t *src
= image_
->GetSrcData(ref
, false);
397 CONV_BUF_TYPE
*dst
= image_
->GetDstData(ref
, false);
398 const int src_stride
= image_
->src_stride();
399 const int dst_stride
= image_
->dst_stride();
402 av1_convolve_2d_scale_c(src
, src_stride
, dst
, dst_stride
, width_
, height_
,
403 &filter_x_
.params_
, &filter_y_
.params_
, subpel_x_
,
404 kXStepQn
, subpel_y_
, kYStepQn
, &convolve_params_
);
406 tst_fun_(src
, src_stride
, dst
, dst_stride
, width_
, height_
,
407 &filter_x_
.params_
, &filter_y_
.params_
, subpel_x_
, kXStepQn
,
408 subpel_y_
, kYStepQn
, &convolve_params_
);
413 LowbdConvolveFunc tst_fun_
;
416 const BlockDimension kBlockDim
[] = {
417 make_tuple(2, 2), make_tuple(2, 4), make_tuple(4, 4),
418 make_tuple(4, 8), make_tuple(8, 4), make_tuple(8, 8),
419 make_tuple(8, 16), make_tuple(16, 8), make_tuple(16, 16),
420 make_tuple(16, 32), make_tuple(32, 16), make_tuple(32, 32),
421 make_tuple(32, 64), make_tuple(64, 32), make_tuple(64, 64),
422 make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128),
425 const NTaps kNTaps
[] = { EIGHT_TAP
, TEN_TAP
, TWELVE_TAP
};
427 TEST_P(LowBDConvolveScaleTest
, Check
) { Run(); }
428 TEST_P(LowBDConvolveScaleTest
, DISABLED_Speed
) { SpeedTest(); }
430 INSTANTIATE_TEST_CASE_P(
431 SSE4_1
, LowBDConvolveScaleTest
,
432 ::testing::Combine(::testing::Values(av1_convolve_2d_scale_sse4_1
),
433 ::testing::ValuesIn(kBlockDim
),
434 ::testing::ValuesIn(kNTaps
), ::testing::ValuesIn(kNTaps
),
437 typedef void (*HighbdConvolveFunc
)(const uint16_t *src
, int src_stride
,
438 int32_t *dst
, int dst_stride
, int w
, int h
,
439 InterpFilterParams
*filter_params_x
,
440 InterpFilterParams
*filter_params_y
,
441 const int subpel_x_qn
, const int x_step_qn
,
442 const int subpel_y_qn
, const int y_step_qn
,
443 ConvolveParams
*conv_params
, int bd
);
445 // Test parameter list:
446 // <tst_fun, dims, ntaps_x, ntaps_y, avg, bd>
447 typedef tuple
<HighbdConvolveFunc
, BlockDimension
, NTaps
, NTaps
, bool, int>
450 class HighBDConvolveScaleTest
451 : public ConvolveScaleTestBase
<uint16_t>,
452 public ::testing::WithParamInterface
<HighBDParams
> {
454 virtual ~HighBDConvolveScaleTest() {}
457 tst_fun_
= GET_PARAM(0);
459 const BlockDimension
&block
= GET_PARAM(1);
460 const NTaps ntaps_x
= GET_PARAM(2);
461 const NTaps ntaps_y
= GET_PARAM(3);
462 const bool avg
= GET_PARAM(4);
463 const int bd
= GET_PARAM(5);
465 SetParams(BaseParams(block
, ntaps_x
, ntaps_y
, avg
), bd
);
468 void RunOne(bool ref
) {
469 const uint16_t *src
= image_
->GetSrcData(ref
, false);
470 CONV_BUF_TYPE
*dst
= image_
->GetDstData(ref
, false);
471 const int src_stride
= image_
->src_stride();
472 const int dst_stride
= image_
->dst_stride();
475 av1_highbd_convolve_2d_scale_c(
476 src
, src_stride
, dst
, dst_stride
, width_
, height_
, &filter_x_
.params_
,
477 &filter_y_
.params_
, subpel_x_
, kXStepQn
, subpel_y_
, kYStepQn
,
478 &convolve_params_
, bd_
);
480 tst_fun_(src
, src_stride
, dst
, dst_stride
, width_
, height_
,
481 &filter_x_
.params_
, &filter_y_
.params_
, subpel_x_
, kXStepQn
,
482 subpel_y_
, kYStepQn
, &convolve_params_
, bd_
);
487 HighbdConvolveFunc tst_fun_
;
490 const int kBDs
[] = { 8, 10, 12 };
492 TEST_P(HighBDConvolveScaleTest
, Check
) { Run(); }
493 TEST_P(HighBDConvolveScaleTest
, DISABLED_Speed
) { SpeedTest(); }
495 INSTANTIATE_TEST_CASE_P(
496 SSE4_1
, HighBDConvolveScaleTest
,
497 ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_sse4_1
),
498 ::testing::ValuesIn(kBlockDim
),
499 ::testing::ValuesIn(kNTaps
), ::testing::ValuesIn(kNTaps
),
500 ::testing::Bool(), ::testing::ValuesIn(kBDs
)));