2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
16 #include "./av1_rtcd.h"
17 #include "aom_ports/aom_timer.h"
18 #include "test/acm_random.h"
19 #include "test/clear_system_state.h"
20 #include "test/register_state_check.h"
21 #include "test/util.h"
23 #include "av1/common/common_data.h"
26 const int kTestIters
= 10;
27 const int kPerfIters
= 1000;
31 const int kXStepQn
= 16;
32 const int kYStepQn
= 20;
34 using libaom_test::ACMRandom
;
35 using std::tr1::make_tuple
;
36 using std::tr1::tuple
;
38 enum NTaps
{ EIGHT_TAP
, TEN_TAP
, TWELVE_TAP
};
39 int NTapsToInt(NTaps ntaps
) { return 8 + static_cast<int>(ntaps
) * 2; }
41 // A 16-bit filter with a configurable number of taps.
44 void set(NTaps ntaps
, bool backwards
);
46 InterpFilterParams params_
;
49 std::vector
<int16_t> coeffs_
;
52 void TestFilter::set(NTaps ntaps
, bool backwards
) {
53 const int n
= NTapsToInt(ntaps
);
54 assert(n
>= 8 && n
<= 12);
56 // The filter has n * SUBPEL_SHIFTS proper elements and an extra 8 bogus
57 // elements at the end so that convolutions can read off the end safely.
58 coeffs_
.resize(n
* SUBPEL_SHIFTS
+ 8);
60 // The coefficients are pretty much arbitrary, but convolutions shouldn't
61 // over or underflow. For the first filter (subpels = 0), we use an
62 // increasing or decreasing ramp (depending on the backwards parameter). We
63 // don't want any zero coefficients, so we make it have an x-intercept at -1
64 // or n. To ensure absence of under/overflow, we normalise the area under the
65 // ramp to be I = 1 << FILTER_BITS (so that convolving a constant function
66 // gives the identity).
68 // When increasing, the function has the form:
72 // Summing and rearranging for A gives A = 2 * I / (n * (n + 1)). If the
73 // filter is reversed, we have the same A but with formula
76 const int I
= 1 << FILTER_BITS
;
77 const float A
= 2.f
* I
/ (n
* (n
+ 1.f
));
78 for (int i
= 0; i
< n
; ++i
) {
79 coeffs_
[i
] = static_cast<int16_t>(A
* (backwards
? (n
- i
) : (i
+ 1)));
82 // For the other filters, make them slightly different by swapping two
83 // columns. Filter k will have the columns (k % n) and (7 * k) % n swapped.
84 const size_t filter_size
= sizeof(coeffs_
[0] * n
);
85 int16_t *const filter0
= &coeffs_
[0];
86 for (int k
= 1; k
< SUBPEL_SHIFTS
; ++k
) {
87 int16_t *filterk
= &coeffs_
[k
* n
];
88 memcpy(filterk
, filter0
, filter_size
);
90 const int idx0
= k
% n
;
91 const int idx1
= (7 * k
) % n
;
93 const int16_t tmp
= filterk
[idx0
];
94 filterk
[idx0
] = filterk
[idx1
];
98 // Finally, write some rubbish at the end to make sure we don't use it.
99 for (int i
= 0; i
< 8; ++i
) coeffs_
[n
* SUBPEL_SHIFTS
+ i
] = 123 + i
;
102 params_
.filter_ptr
= &coeffs_
[0];
104 // These are ignored by the functions being tested. Set them to whatever.
105 params_
.subpel_shifts
= SUBPEL_SHIFTS
;
106 params_
.interp_filter
= EIGHTTAP_REGULAR
;
109 template <typename SrcPixel
>
112 TestImage(int w
, int h
, int bd
) : w_(w
), h_(h
), bd_(bd
) {
114 assert(bd
<= 8 * static_cast<int>(sizeof(SrcPixel
)));
116 // Pad width by 2*kHPad and then round up to the next multiple of 16
117 // to get src_stride_. Add another 16 for dst_stride_ (to make sure
118 // something goes wrong if we use the wrong one)
119 src_stride_
= (w_
+ 2 * kHPad
+ 15) & ~15;
120 dst_stride_
= src_stride_
+ 16;
122 // Allocate image data
123 src_data_
.resize(2 * src_block_size());
124 dst_data_
.resize(2 * dst_block_size());
127 void Initialize(ACMRandom
*rnd
);
130 int src_stride() const { return src_stride_
; }
131 int dst_stride() const { return dst_stride_
; }
133 int src_block_size() const { return (h_
+ 2 * kVPad
) * src_stride(); }
134 int dst_block_size() const { return (h_
+ 2 * kVPad
) * dst_stride(); }
136 const SrcPixel
*GetSrcData(bool ref
, bool borders
) const {
137 const SrcPixel
*block
= &src_data_
[ref
? 0 : src_block_size()];
138 return borders
? block
: block
+ kHPad
+ src_stride_
* kVPad
;
141 int32_t *GetDstData(bool ref
, bool borders
) {
142 int32_t *block
= &dst_data_
[ref
? 0 : dst_block_size()];
143 return borders
? block
: block
+ kHPad
+ dst_stride_
* kVPad
;
148 int src_stride_
, dst_stride_
;
150 std::vector
<SrcPixel
> src_data_
;
151 std::vector
<int32_t> dst_data_
;
154 template <typename Pixel
>
155 void FillEdge(ACMRandom
*rnd
, int num_pixels
, int bd
, bool trash
, Pixel
*data
) {
157 memset(data
, 0, sizeof(*data
) * num_pixels
);
160 const Pixel mask
= (1 << bd
) - 1;
161 for (int i
= 0; i
< num_pixels
; ++i
) data
[i
] = rnd
->Rand16() & mask
;
164 template <typename Pixel
>
165 void PrepBuffers(ACMRandom
*rnd
, int w
, int h
, int stride
, int bd
,
166 bool trash_edges
, Pixel
*data
) {
168 const Pixel mask
= (1 << bd
) - 1;
170 // Fill in the first buffer with random data
172 FillEdge(rnd
, stride
* kVPad
, bd
, trash_edges
, data
);
173 for (int r
= 0; r
< h
; ++r
) {
174 Pixel
*row_data
= data
+ (kVPad
+ r
) * stride
;
175 // Left border, contents, right border
176 FillEdge(rnd
, kHPad
, bd
, trash_edges
, row_data
);
177 for (int c
= 0; c
< w
; ++c
) row_data
[kHPad
+ c
] = rnd
->Rand16() & mask
;
178 FillEdge(rnd
, kHPad
, bd
, trash_edges
, row_data
+ kHPad
+ w
);
181 FillEdge(rnd
, stride
* kVPad
, bd
, trash_edges
, data
+ stride
* (kVPad
+ h
));
183 const int bpp
= sizeof(*data
);
184 const int block_elts
= stride
* (h
+ 2 * kVPad
);
185 const int block_size
= bpp
* block_elts
;
187 // Now copy that to the second buffer
188 memcpy(data
+ block_elts
, data
, block_size
);
191 template <typename SrcPixel
>
192 void TestImage
<SrcPixel
>::Initialize(ACMRandom
*rnd
) {
193 PrepBuffers(rnd
, w_
, h_
, src_stride_
, bd_
, false, &src_data_
[0]);
194 PrepBuffers(rnd
, w_
, h_
, dst_stride_
, bd_
, true, &dst_data_
[0]);
197 template <typename SrcPixel
>
198 void TestImage
<SrcPixel
>::Check() const {
199 // If memcmp returns 0, there's nothing to do.
200 const int num_pixels
= dst_block_size();
201 const int32_t *ref_dst
= &dst_data_
[0];
202 const int32_t *tst_dst
= &dst_data_
[num_pixels
];
204 if (0 == memcmp(ref_dst
, tst_dst
, sizeof(*ref_dst
) * num_pixels
)) return;
206 // Otherwise, iterate through the buffer looking for differences (including
208 const int stride
= dst_stride_
;
209 for (int r
= 0; r
< h_
+ 2 * kVPad
; ++r
) {
210 for (int c
= 0; c
< w_
+ 2 * kHPad
; ++c
) {
211 const int32_t ref_value
= ref_dst
[r
* stride
+ c
];
212 const int32_t tst_value
= tst_dst
[r
* stride
+ c
];
214 EXPECT_EQ(tst_value
, ref_value
)
215 << "Error at row: " << (r
- kVPad
) << ", col: " << (c
- kHPad
);
220 typedef tuple
<int, int> BlockDimension
;
223 BaseParams(BlockDimension dims
, NTaps ntaps_x
, NTaps ntaps_y
, bool avg
)
224 : dims(dims
), ntaps_x(ntaps_x
), ntaps_y(ntaps_y
), avg(avg
) {}
227 NTaps ntaps_x
, ntaps_y
;
231 template <typename SrcPixel
>
232 class ConvolveScaleTestBase
: public ::testing::Test
{
234 ConvolveScaleTestBase() : image_(NULL
) {}
235 virtual ~ConvolveScaleTestBase() { delete image_
; }
236 virtual void TearDown() { libaom_test::ClearSystemState(); }
238 // Implemented by subclasses (SetUp depends on the parameters passed
239 // in and RunOne depends on the function to be tested. These can't
240 // be templated for low/high bit depths because they have different
241 // numbers of parameters)
242 virtual void SetUp() = 0;
243 virtual void RunOne(bool ref
) = 0;
246 void SetParams(const BaseParams
¶ms
, int bd
) {
247 width_
= std::tr1::get
<0>(params
.dims
);
248 height_
= std::tr1::get
<1>(params
.dims
);
249 ntaps_x_
= params
.ntaps_x
;
250 ntaps_y_
= params
.ntaps_y
;
254 filter_x_
.set(ntaps_x_
, false);
255 filter_y_
.set(ntaps_y_
, true);
257 get_conv_params_no_round(0, avg_
!= false, 0, NULL
, 0, 1, bd
);
260 image_
= new TestImage
<SrcPixel
>(width_
, height_
, bd_
);
263 void SetConvParamOffset(int i
, int j
) {
264 if (i
== -1 && j
== -1) {
265 convolve_params_
.use_jnt_comp_avg
= 0;
267 convolve_params_
.use_jnt_comp_avg
= 1;
268 convolve_params_
.fwd_offset
= quant_dist_lookup_table
[i
][j
][0];
269 convolve_params_
.bck_offset
= quant_dist_lookup_table
[i
][j
][1];
274 ACMRandom
rnd(ACMRandom::DeterministicSeed());
275 for (int i
= 0; i
< kTestIters
; ++i
) {
276 SetConvParamOffset(-1, -1);
282 for (int j
= 0; j
< 2; ++j
) {
283 for (int k
= 0; k
< 4; ++k
) {
284 SetConvParamOffset(j
, k
);
295 ACMRandom
rnd(ACMRandom::DeterministicSeed());
298 aom_usec_timer ref_timer
;
299 aom_usec_timer_start(&ref_timer
);
300 for (int i
= 0; i
< kPerfIters
; ++i
) RunOne(true);
301 aom_usec_timer_mark(&ref_timer
);
302 const int64_t ref_time
= aom_usec_timer_elapsed(&ref_timer
);
304 aom_usec_timer tst_timer
;
305 aom_usec_timer_start(&tst_timer
);
306 for (int i
= 0; i
< kPerfIters
; ++i
) RunOne(false);
307 aom_usec_timer_mark(&tst_timer
);
308 const int64_t tst_time
= aom_usec_timer_elapsed(&tst_timer
);
310 std::cout
<< "[ ] C time = " << ref_time
/ 1000
311 << " ms, SIMD time = " << tst_time
/ 1000 << " ms\n";
313 EXPECT_GT(ref_time
, tst_time
)
314 << "Error: CDEFSpeedTest, SIMD slower than C.\n"
315 << "C time: " << ref_time
<< " us\n"
316 << "SIMD time: " << tst_time
<< " us\n";
319 static int RandomSubpel(ACMRandom
*rnd
) {
320 const uint8_t subpel_mode
= rnd
->Rand8();
321 if ((subpel_mode
& 7) == 0) {
323 } else if ((subpel_mode
& 7) == 1) {
324 return SCALE_SUBPEL_SHIFTS
- 1;
326 return 1 + rnd
->PseudoUniform(SCALE_SUBPEL_SHIFTS
- 2);
330 void Prep(ACMRandom
*rnd
) {
333 // Choose subpel_x_ and subpel_y_. They should be less than
334 // SCALE_SUBPEL_SHIFTS; we also want to add extra weight to "interesting"
335 // values: 0 and SCALE_SUBPEL_SHIFTS - 1
336 subpel_x_
= RandomSubpel(rnd
);
337 subpel_y_
= RandomSubpel(rnd
);
339 image_
->Initialize(rnd
);
342 int width_
, height_
, bd_
;
343 NTaps ntaps_x_
, ntaps_y_
;
345 int subpel_x_
, subpel_y_
;
346 TestFilter filter_x_
, filter_y_
;
347 TestImage
<SrcPixel
> *image_
;
348 ConvolveParams convolve_params_
;
351 typedef tuple
<int, int> BlockDimension
;
353 typedef void (*LowbdConvolveFunc
)(const uint8_t *src
, int src_stride
,
354 int32_t *dst
, int dst_stride
, int w
, int h
,
355 InterpFilterParams
*filter_params_x
,
356 InterpFilterParams
*filter_params_y
,
357 const int subpel_x_qn
, const int x_step_qn
,
358 const int subpel_y_qn
, const int y_step_qn
,
359 ConvolveParams
*conv_params
);
361 // Test parameter list:
362 // <tst_fun, dims, ntaps_x, ntaps_y, avg>
363 typedef tuple
<LowbdConvolveFunc
, BlockDimension
, NTaps
, NTaps
, bool>
366 class LowBDConvolveScaleTest
367 : public ConvolveScaleTestBase
<uint8_t>,
368 public ::testing::WithParamInterface
<LowBDParams
> {
370 virtual ~LowBDConvolveScaleTest() {}
373 tst_fun_
= GET_PARAM(0);
375 const BlockDimension
&block
= GET_PARAM(1);
376 const NTaps ntaps_x
= GET_PARAM(2);
377 const NTaps ntaps_y
= GET_PARAM(3);
379 const bool avg
= GET_PARAM(4);
381 SetParams(BaseParams(block
, ntaps_x
, ntaps_y
, avg
), bd
);
384 void RunOne(bool ref
) {
385 const uint8_t *src
= image_
->GetSrcData(ref
, false);
386 CONV_BUF_TYPE
*dst
= image_
->GetDstData(ref
, false);
387 const int src_stride
= image_
->src_stride();
388 const int dst_stride
= image_
->dst_stride();
391 av1_convolve_2d_scale_c(src
, src_stride
, dst
, dst_stride
, width_
, height_
,
392 &filter_x_
.params_
, &filter_y_
.params_
, subpel_x_
,
393 kXStepQn
, subpel_y_
, kYStepQn
, &convolve_params_
);
395 tst_fun_(src
, src_stride
, dst
, dst_stride
, width_
, height_
,
396 &filter_x_
.params_
, &filter_y_
.params_
, subpel_x_
, kXStepQn
,
397 subpel_y_
, kYStepQn
, &convolve_params_
);
402 LowbdConvolveFunc tst_fun_
;
405 const BlockDimension kBlockDim
[] = {
406 make_tuple(2, 2), make_tuple(2, 4), make_tuple(4, 4),
407 make_tuple(4, 8), make_tuple(8, 4), make_tuple(8, 8),
408 make_tuple(8, 16), make_tuple(16, 8), make_tuple(16, 16),
409 make_tuple(16, 32), make_tuple(32, 16), make_tuple(32, 32),
410 make_tuple(32, 64), make_tuple(64, 32), make_tuple(64, 64),
411 make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128),
414 const NTaps kNTaps
[] = { EIGHT_TAP
, TEN_TAP
, TWELVE_TAP
};
416 TEST_P(LowBDConvolveScaleTest
, Check
) { Run(); }
417 TEST_P(LowBDConvolveScaleTest
, DISABLED_Speed
) { SpeedTest(); }
419 INSTANTIATE_TEST_CASE_P(
420 SSE4_1
, LowBDConvolveScaleTest
,
421 ::testing::Combine(::testing::Values(av1_convolve_2d_scale_sse4_1
),
422 ::testing::ValuesIn(kBlockDim
),
423 ::testing::ValuesIn(kNTaps
), ::testing::ValuesIn(kNTaps
),
426 typedef void (*HighbdConvolveFunc
)(const uint16_t *src
, int src_stride
,
427 int32_t *dst
, int dst_stride
, int w
, int h
,
428 InterpFilterParams
*filter_params_x
,
429 InterpFilterParams
*filter_params_y
,
430 const int subpel_x_qn
, const int x_step_qn
,
431 const int subpel_y_qn
, const int y_step_qn
,
432 ConvolveParams
*conv_params
, int bd
);
434 // Test parameter list:
435 // <tst_fun, dims, ntaps_x, ntaps_y, avg, bd>
436 typedef tuple
<HighbdConvolveFunc
, BlockDimension
, NTaps
, NTaps
, bool, int>
439 class HighBDConvolveScaleTest
440 : public ConvolveScaleTestBase
<uint16_t>,
441 public ::testing::WithParamInterface
<HighBDParams
> {
443 virtual ~HighBDConvolveScaleTest() {}
446 tst_fun_
= GET_PARAM(0);
448 const BlockDimension
&block
= GET_PARAM(1);
449 const NTaps ntaps_x
= GET_PARAM(2);
450 const NTaps ntaps_y
= GET_PARAM(3);
451 const bool avg
= GET_PARAM(4);
452 const int bd
= GET_PARAM(5);
454 SetParams(BaseParams(block
, ntaps_x
, ntaps_y
, avg
), bd
);
457 void RunOne(bool ref
) {
458 const uint16_t *src
= image_
->GetSrcData(ref
, false);
459 CONV_BUF_TYPE
*dst
= image_
->GetDstData(ref
, false);
460 const int src_stride
= image_
->src_stride();
461 const int dst_stride
= image_
->dst_stride();
464 av1_highbd_convolve_2d_scale_c(
465 src
, src_stride
, dst
, dst_stride
, width_
, height_
, &filter_x_
.params_
,
466 &filter_y_
.params_
, subpel_x_
, kXStepQn
, subpel_y_
, kYStepQn
,
467 &convolve_params_
, bd_
);
469 tst_fun_(src
, src_stride
, dst
, dst_stride
, width_
, height_
,
470 &filter_x_
.params_
, &filter_y_
.params_
, subpel_x_
, kXStepQn
,
471 subpel_y_
, kYStepQn
, &convolve_params_
, bd_
);
476 HighbdConvolveFunc tst_fun_
;
479 const int kBDs
[] = { 8, 10, 12 };
481 TEST_P(HighBDConvolveScaleTest
, Check
) { Run(); }
482 TEST_P(HighBDConvolveScaleTest
, DISABLED_Speed
) { SpeedTest(); }
484 INSTANTIATE_TEST_CASE_P(
485 SSE4_1
, HighBDConvolveScaleTest
,
486 ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_sse4_1
),
487 ::testing::ValuesIn(kBlockDim
),
488 ::testing::ValuesIn(kNTaps
), ::testing::ValuesIn(kNTaps
),
489 ::testing::Bool(), ::testing::ValuesIn(kBDs
)));