2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #include "test/hiprec_convolve_test_util.h"
14 #include "av1/common/restoration.h"
16 using ::testing::make_tuple
;
17 using ::testing::tuple
;
19 namespace libaom_test
{
21 // Generate a random pair of filter kernels, using the ranges
22 // of possible values from the loop-restoration experiment
23 static void generate_kernels(ACMRandom
*rnd
, InterpKernel hkernel
,
24 InterpKernel vkernel
) {
25 hkernel
[0] = hkernel
[6] =
26 WIENER_FILT_TAP0_MINV
+
27 rnd
->PseudoUniform(WIENER_FILT_TAP0_MAXV
+ 1 - WIENER_FILT_TAP0_MINV
);
28 hkernel
[1] = hkernel
[5] =
29 WIENER_FILT_TAP1_MINV
+
30 rnd
->PseudoUniform(WIENER_FILT_TAP1_MAXV
+ 1 - WIENER_FILT_TAP1_MINV
);
31 hkernel
[2] = hkernel
[4] =
32 WIENER_FILT_TAP2_MINV
+
33 rnd
->PseudoUniform(WIENER_FILT_TAP2_MAXV
+ 1 - WIENER_FILT_TAP2_MINV
);
34 hkernel
[3] = -(hkernel
[0] + hkernel
[1] + hkernel
[2]);
37 vkernel
[0] = vkernel
[6] =
38 WIENER_FILT_TAP0_MINV
+
39 rnd
->PseudoUniform(WIENER_FILT_TAP0_MAXV
+ 1 - WIENER_FILT_TAP0_MINV
);
40 vkernel
[1] = vkernel
[5] =
41 WIENER_FILT_TAP1_MINV
+
42 rnd
->PseudoUniform(WIENER_FILT_TAP1_MAXV
+ 1 - WIENER_FILT_TAP1_MINV
);
43 vkernel
[2] = vkernel
[4] =
44 WIENER_FILT_TAP2_MINV
+
45 rnd
->PseudoUniform(WIENER_FILT_TAP2_MAXV
+ 1 - WIENER_FILT_TAP2_MINV
);
46 vkernel
[3] = -(vkernel
[0] + vkernel
[1] + vkernel
[2]);
50 namespace AV1HiprecConvolve
{
52 ::testing::internal::ParamGenerator
<HiprecConvolveParam
> BuildParams(
53 hiprec_convolve_func filter
) {
54 const HiprecConvolveParam params
[] = {
55 make_tuple(8, 8, 50000, filter
), make_tuple(8, 4, 50000, filter
),
56 make_tuple(64, 24, 1000, filter
), make_tuple(64, 64, 1000, filter
),
57 make_tuple(64, 56, 1000, filter
), make_tuple(32, 8, 10000, filter
),
58 make_tuple(32, 28, 10000, filter
), make_tuple(32, 32, 10000, filter
),
59 make_tuple(16, 34, 10000, filter
), make_tuple(32, 34, 10000, filter
),
60 make_tuple(64, 34, 1000, filter
), make_tuple(8, 17, 10000, filter
),
61 make_tuple(16, 17, 10000, filter
), make_tuple(32, 17, 10000, filter
)
63 return ::testing::ValuesIn(params
);
66 AV1HiprecConvolveTest::~AV1HiprecConvolveTest() {}
67 void AV1HiprecConvolveTest::SetUp() {
68 rnd_
.Reset(ACMRandom::DeterministicSeed());
71 void AV1HiprecConvolveTest::TearDown() { libaom_test::ClearSystemState(); }
73 void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl
) {
74 const int w
= 128, h
= 128;
75 const int out_w
= GET_PARAM(0), out_h
= GET_PARAM(1);
76 const int num_iters
= GET_PARAM(2);
78 const ConvolveParams conv_params
= get_conv_params_wiener(8);
80 uint8_t *input_
= new uint8_t[h
* w
];
81 uint8_t *input
= input_
;
83 // The AVX2 convolve functions always write rows with widths that are
84 // multiples of 16. So to avoid a buffer overflow, we may need to pad
85 // rows to a multiple of 16.
86 int output_n
= ALIGN_POWER_OF_TWO(out_w
, 4) * out_h
;
87 uint8_t *output
= new uint8_t[output_n
];
88 uint8_t *output2
= new uint8_t[output_n
];
90 // Generate random filter kernels
91 DECLARE_ALIGNED(16, InterpKernel
, hkernel
);
92 DECLARE_ALIGNED(16, InterpKernel
, vkernel
);
94 generate_kernels(&rnd_
, hkernel
, vkernel
);
96 for (i
= 0; i
< h
; ++i
)
97 for (j
= 0; j
< w
; ++j
) input
[i
* w
+ j
] = rnd_
.Rand8();
99 for (i
= 0; i
< num_iters
; ++i
) {
100 // Choose random locations within the source block
101 int offset_r
= 3 + rnd_
.PseudoUniform(h
- out_h
- 7);
102 int offset_c
= 3 + rnd_
.PseudoUniform(w
- out_w
- 7);
103 av1_wiener_convolve_add_src_c(input
+ offset_r
* w
+ offset_c
, w
, output
,
104 out_w
, hkernel
, 16, vkernel
, 16, out_w
, out_h
,
106 test_impl(input
+ offset_r
* w
+ offset_c
, w
, output2
, out_w
, hkernel
, 16,
107 vkernel
, 16, out_w
, out_h
, &conv_params
);
109 for (j
= 0; j
< out_w
* out_h
; ++j
)
110 ASSERT_EQ(output
[j
], output2
[j
])
111 << "Pixel mismatch at index " << j
<< " = (" << (j
% out_w
) << ", "
112 << (j
/ out_w
) << ") on iteration " << i
;
119 void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl
) {
120 const int w
= 128, h
= 128;
121 const int out_w
= GET_PARAM(0), out_h
= GET_PARAM(1);
122 const int num_iters
= GET_PARAM(2) / 500;
124 const ConvolveParams conv_params
= get_conv_params_wiener(8);
126 uint8_t *input_
= new uint8_t[h
* w
];
127 uint8_t *input
= input_
;
129 // The AVX2 convolve functions always write rows with widths that are
130 // multiples of 16. So to avoid a buffer overflow, we may need to pad
131 // rows to a multiple of 16.
132 int output_n
= ALIGN_POWER_OF_TWO(out_w
, 4) * out_h
;
133 uint8_t *output
= new uint8_t[output_n
];
134 uint8_t *output2
= new uint8_t[output_n
];
136 // Generate random filter kernels
137 DECLARE_ALIGNED(16, InterpKernel
, hkernel
);
138 DECLARE_ALIGNED(16, InterpKernel
, vkernel
);
140 generate_kernels(&rnd_
, hkernel
, vkernel
);
142 for (i
= 0; i
< h
; ++i
)
143 for (j
= 0; j
< w
; ++j
) input
[i
* w
+ j
] = rnd_
.Rand8();
145 aom_usec_timer ref_timer
;
146 aom_usec_timer_start(&ref_timer
);
147 for (i
= 0; i
< num_iters
; ++i
) {
148 for (j
= 3; j
< h
- out_h
- 4; j
++) {
149 for (k
= 3; k
< w
- out_w
- 4; k
++) {
150 av1_wiener_convolve_add_src_c(input
+ j
* w
+ k
, w
, output
, out_w
,
151 hkernel
, 16, vkernel
, 16, out_w
, out_h
,
156 aom_usec_timer_mark(&ref_timer
);
157 const int64_t ref_time
= aom_usec_timer_elapsed(&ref_timer
);
159 aom_usec_timer tst_timer
;
160 aom_usec_timer_start(&tst_timer
);
161 for (i
= 0; i
< num_iters
; ++i
) {
162 for (j
= 3; j
< h
- out_h
- 4; j
++) {
163 for (k
= 3; k
< w
- out_w
- 4; k
++) {
164 test_impl(input
+ j
* w
+ k
, w
, output2
, out_w
, hkernel
, 16, vkernel
,
165 16, out_w
, out_h
, &conv_params
);
169 aom_usec_timer_mark(&tst_timer
);
170 const int64_t tst_time
= aom_usec_timer_elapsed(&tst_timer
);
172 std::cout
<< "[ ] C time = " << ref_time
/ 1000
173 << " ms, SIMD time = " << tst_time
/ 1000 << " ms\n";
175 EXPECT_GT(ref_time
, tst_time
)
176 << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
177 << "C time: " << ref_time
<< " us\n"
178 << "SIMD time: " << tst_time
<< " us\n";
184 } // namespace AV1HiprecConvolve
186 namespace AV1HighbdHiprecConvolve
{
188 ::testing::internal::ParamGenerator
<HighbdHiprecConvolveParam
> BuildParams(
189 highbd_hiprec_convolve_func filter
) {
190 const HighbdHiprecConvolveParam params
[] = {
191 make_tuple(8, 8, 50000, 8, filter
), make_tuple(64, 64, 1000, 8, filter
),
192 make_tuple(32, 8, 10000, 8, filter
), make_tuple(8, 8, 50000, 10, filter
),
193 make_tuple(64, 64, 1000, 10, filter
), make_tuple(32, 8, 10000, 10, filter
),
194 make_tuple(8, 8, 50000, 12, filter
), make_tuple(64, 64, 1000, 12, filter
),
195 make_tuple(32, 8, 10000, 12, filter
),
197 return ::testing::ValuesIn(params
);
200 AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() {}
201 void AV1HighbdHiprecConvolveTest::SetUp() {
202 rnd_
.Reset(ACMRandom::DeterministicSeed());
205 void AV1HighbdHiprecConvolveTest::TearDown() {
206 libaom_test::ClearSystemState();
209 void AV1HighbdHiprecConvolveTest::RunCheckOutput(
210 highbd_hiprec_convolve_func test_impl
) {
211 const int w
= 128, h
= 128;
212 const int out_w
= GET_PARAM(0), out_h
= GET_PARAM(1);
213 const int num_iters
= GET_PARAM(2);
214 const int bd
= GET_PARAM(3);
216 const ConvolveParams conv_params
= get_conv_params_wiener(bd
);
218 uint16_t *input
= new uint16_t[h
* w
];
220 // The AVX2 convolve functions always write rows with widths that are
221 // multiples of 16. So to avoid a buffer overflow, we may need to pad
222 // rows to a multiple of 16.
223 int output_n
= ALIGN_POWER_OF_TWO(out_w
, 4) * out_h
;
224 uint16_t *output
= new uint16_t[output_n
];
225 uint16_t *output2
= new uint16_t[output_n
];
227 // Generate random filter kernels
228 DECLARE_ALIGNED(16, InterpKernel
, hkernel
);
229 DECLARE_ALIGNED(16, InterpKernel
, vkernel
);
231 generate_kernels(&rnd_
, hkernel
, vkernel
);
233 for (i
= 0; i
< h
; ++i
)
234 for (j
= 0; j
< w
; ++j
) input
[i
* w
+ j
] = rnd_
.Rand16() & ((1 << bd
) - 1);
236 uint8_t *input_ptr
= CONVERT_TO_BYTEPTR(input
);
237 uint8_t *output_ptr
= CONVERT_TO_BYTEPTR(output
);
238 uint8_t *output2_ptr
= CONVERT_TO_BYTEPTR(output2
);
240 for (i
= 0; i
< num_iters
; ++i
) {
241 // Choose random locations within the source block
242 int offset_r
= 3 + rnd_
.PseudoUniform(h
- out_h
- 7);
243 int offset_c
= 3 + rnd_
.PseudoUniform(w
- out_w
- 7);
244 av1_highbd_wiener_convolve_add_src_c(
245 input_ptr
+ offset_r
* w
+ offset_c
, w
, output_ptr
, out_w
, hkernel
, 16,
246 vkernel
, 16, out_w
, out_h
, &conv_params
, bd
);
247 test_impl(input_ptr
+ offset_r
* w
+ offset_c
, w
, output2_ptr
, out_w
,
248 hkernel
, 16, vkernel
, 16, out_w
, out_h
, &conv_params
, bd
);
250 for (j
= 0; j
< out_w
* out_h
; ++j
)
251 ASSERT_EQ(output
[j
], output2
[j
])
252 << "Pixel mismatch at index " << j
<< " = (" << (j
% out_w
) << ", "
253 << (j
/ out_w
) << ") on iteration " << i
;
260 void AV1HighbdHiprecConvolveTest::RunSpeedTest(
261 highbd_hiprec_convolve_func test_impl
) {
262 const int w
= 128, h
= 128;
263 const int out_w
= GET_PARAM(0), out_h
= GET_PARAM(1);
264 const int num_iters
= GET_PARAM(2) / 500;
265 const int bd
= GET_PARAM(3);
267 const ConvolveParams conv_params
= get_conv_params_wiener(bd
);
269 uint16_t *input
= new uint16_t[h
* w
];
271 // The AVX2 convolve functions always write rows with widths that are
272 // multiples of 16. So to avoid a buffer overflow, we may need to pad
273 // rows to a multiple of 16.
274 int output_n
= ALIGN_POWER_OF_TWO(out_w
, 4) * out_h
;
275 uint16_t *output
= new uint16_t[output_n
];
276 uint16_t *output2
= new uint16_t[output_n
];
278 // Generate random filter kernels
279 DECLARE_ALIGNED(16, InterpKernel
, hkernel
);
280 DECLARE_ALIGNED(16, InterpKernel
, vkernel
);
282 generate_kernels(&rnd_
, hkernel
, vkernel
);
284 for (i
= 0; i
< h
; ++i
)
285 for (j
= 0; j
< w
; ++j
) input
[i
* w
+ j
] = rnd_
.Rand16() & ((1 << bd
) - 1);
287 uint8_t *input_ptr
= CONVERT_TO_BYTEPTR(input
);
288 uint8_t *output_ptr
= CONVERT_TO_BYTEPTR(output
);
289 uint8_t *output2_ptr
= CONVERT_TO_BYTEPTR(output2
);
291 aom_usec_timer ref_timer
;
292 aom_usec_timer_start(&ref_timer
);
293 for (i
= 0; i
< num_iters
; ++i
) {
294 for (j
= 3; j
< h
- out_h
- 4; j
++) {
295 for (k
= 3; k
< w
- out_w
- 4; k
++) {
296 av1_highbd_wiener_convolve_add_src_c(
297 input_ptr
+ j
* w
+ k
, w
, output_ptr
, out_w
, hkernel
, 16, vkernel
,
298 16, out_w
, out_h
, &conv_params
, bd
);
302 aom_usec_timer_mark(&ref_timer
);
303 const int64_t ref_time
= aom_usec_timer_elapsed(&ref_timer
);
305 aom_usec_timer tst_timer
;
306 aom_usec_timer_start(&tst_timer
);
307 for (i
= 0; i
< num_iters
; ++i
) {
308 for (j
= 3; j
< h
- out_h
- 4; j
++) {
309 for (k
= 3; k
< w
- out_w
- 4; k
++) {
310 test_impl(input_ptr
+ j
* w
+ k
, w
, output2_ptr
, out_w
, hkernel
, 16,
311 vkernel
, 16, out_w
, out_h
, &conv_params
, bd
);
315 aom_usec_timer_mark(&tst_timer
);
316 const int64_t tst_time
= aom_usec_timer_elapsed(&tst_timer
);
318 std::cout
<< "[ ] C time = " << ref_time
/ 1000
319 << " ms, SIMD time = " << tst_time
/ 1000 << " ms\n";
321 EXPECT_GT(ref_time
, tst_time
)
322 << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
323 << "C time: " << ref_time
<< " us\n"
324 << "SIMD time: " << tst_time
<< " us\n";
330 } // namespace AV1HighbdHiprecConvolve
331 } // namespace libaom_test