Add ssse3 aom_smooth_h_predictor_4xh
[aom.git] / test / warp_filter_test_util.cc
blob27299d906e1c6e9605288c5b2e77b1a380c353fa
1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
11 #include "aom_ports/aom_timer.h"
12 #include "test/warp_filter_test_util.h"
14 using std::tr1::make_tuple;
15 using std::tr1::tuple;
17 namespace libaom_test {
19 int32_t random_warped_param(libaom_test::ACMRandom *rnd, int bits) {
20 // 1 in 8 chance of generating zero (arbitrarily chosen)
21 if (((rnd->Rand8()) & 7) == 0) return 0;
22 // Otherwise, enerate uniform values in the range
23 // [-(1 << bits), 1] U [1, 1<<bits]
24 int32_t v = 1 + (rnd->Rand16() & ((1 << bits) - 1));
25 if ((rnd->Rand8()) & 1) return -v;
26 return v;
29 void generate_warped_model(libaom_test::ACMRandom *rnd, int32_t *mat,
30 int16_t *alpha, int16_t *beta, int16_t *gamma,
31 int16_t *delta) {
32 while (1) {
33 mat[0] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS + 6);
34 mat[1] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS + 6);
35 mat[2] = (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3)) +
36 (1 << WARPEDMODEL_PREC_BITS);
37 mat[3] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3);
38 // 50/50 chance of generating ROTZOOM vs. AFFINE models
39 if (rnd->Rand8() & 1) {
40 // AFFINE
41 mat[4] = random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3);
42 mat[5] = (random_warped_param(rnd, WARPEDMODEL_PREC_BITS - 3)) +
43 (1 << WARPEDMODEL_PREC_BITS);
44 } else {
45 mat[4] = -mat[3];
46 mat[5] = mat[2];
49 // Calculate the derived parameters and check that they are suitable
50 // for the warp filter.
51 assert(mat[2] != 0);
53 *alpha = clamp(mat[2] - (1 << WARPEDMODEL_PREC_BITS), INT16_MIN, INT16_MAX);
54 *beta = clamp(mat[3], INT16_MIN, INT16_MAX);
55 *gamma = clamp(((int64_t)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) / mat[2],
56 INT16_MIN, INT16_MAX);
57 *delta =
58 clamp(mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
59 (1 << WARPEDMODEL_PREC_BITS),
60 INT16_MIN, INT16_MAX);
62 if ((4 * abs(*alpha) + 7 * abs(*beta) >= (1 << WARPEDMODEL_PREC_BITS)) ||
63 (4 * abs(*gamma) + 4 * abs(*delta) >= (1 << WARPEDMODEL_PREC_BITS)))
64 continue;
66 *alpha = ROUND_POWER_OF_TWO_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS) *
67 (1 << WARP_PARAM_REDUCE_BITS);
68 *beta = ROUND_POWER_OF_TWO_SIGNED(*beta, WARP_PARAM_REDUCE_BITS) *
69 (1 << WARP_PARAM_REDUCE_BITS);
70 *gamma = ROUND_POWER_OF_TWO_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS) *
71 (1 << WARP_PARAM_REDUCE_BITS);
72 *delta = ROUND_POWER_OF_TWO_SIGNED(*delta, WARP_PARAM_REDUCE_BITS) *
73 (1 << WARP_PARAM_REDUCE_BITS);
75 // We have a valid model, so finish
76 return;
80 namespace AV1WarpFilter {
81 #if CONFIG_LOWPRECISION_BLEND
82 ::testing::internal::ParamGenerator<WarpTestParam> BuildParams(
83 warp_affine_func filter) {
84 const WarpTestParam params[] = {
85 make_tuple(4, 4, 50000, filter), make_tuple(8, 8, 50000, filter),
86 make_tuple(64, 64, 1000, filter), make_tuple(4, 16, 20000, filter),
87 make_tuple(32, 8, 10000, filter),
89 return ::testing::ValuesIn(params);
92 AV1WarpFilterTest::~AV1WarpFilterTest() {}
93 void AV1WarpFilterTest::SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); }
95 void AV1WarpFilterTest::TearDown() { libaom_test::ClearSystemState(); }
97 void AV1WarpFilterTest::RunSpeedTest(warp_affine_func test_impl) {
98 const int w = 128, h = 128;
99 const int border = 16;
100 const int stride = w + 2 * border;
101 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
102 int sub_x, sub_y;
103 const int bd = 8;
105 uint8_t *input_ = new uint8_t[h * stride];
106 uint8_t *input = input_ + border;
108 // The warp functions always write rows with widths that are multiples of 8.
109 // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
110 int output_n = ((out_w + 7) & ~7) * out_h;
111 uint8_t *output = new uint8_t[output_n];
112 int32_t mat[8];
113 int16_t alpha, beta, gamma, delta;
114 ConvolveParams conv_params = get_conv_params(0, 0, 0, bd);
115 CONV_BUF_TYPE *dsta = new CONV_BUF_TYPE[output_n];
117 generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta);
119 for (int r = 0; r < h; ++r)
120 for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand8();
121 for (int r = 0; r < h; ++r) {
122 memset(input + r * stride - border, input[r * stride], border);
123 memset(input + r * stride + w, input[r * stride + (w - 1)], border);
126 sub_x = 0;
127 sub_y = 0;
128 int do_average = 0;
130 conv_params = get_conv_params_no_round(0, do_average, 0, dsta, out_w, 1, bd);
131 conv_params.use_jnt_comp_avg = 0;
133 const int num_loops = 1000000000 / (out_w + out_h);
134 aom_usec_timer timer;
135 aom_usec_timer_start(&timer);
136 for (int i = 0; i < num_loops; ++i)
137 test_impl(mat, input, w, h, stride, output, 32, 32, out_w, out_h, out_w,
138 sub_x, sub_y, &conv_params, alpha, beta, gamma, delta);
140 aom_usec_timer_mark(&timer);
141 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
142 printf("warp %3dx%-3d: %7.2f ns\n", out_w, out_h,
143 1000.0 * elapsed_time / num_loops);
145 delete[] input_;
146 delete[] output;
147 delete[] dsta;
150 void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
151 const int w = 128, h = 128;
152 const int border = 16;
153 const int stride = w + 2 * border;
154 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
155 const int num_iters = GET_PARAM(2);
156 int i, j, sub_x, sub_y;
157 const int bd = 8;
159 // The warp functions always write rows with widths that are multiples of 8.
160 // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
161 int output_n = ((out_w + 7) & ~7) * out_h;
162 uint8_t *input_ = new uint8_t[h * stride];
163 uint8_t *input = input_ + border;
164 uint8_t *output = new uint8_t[output_n];
165 uint8_t *output2 = new uint8_t[output_n];
166 int32_t mat[8];
167 int16_t alpha, beta, gamma, delta;
168 ConvolveParams conv_params = get_conv_params(0, 0, 0, bd);
169 CONV_BUF_TYPE *dsta = new CONV_BUF_TYPE[output_n];
170 CONV_BUF_TYPE *dstb = new CONV_BUF_TYPE[output_n];
171 for (int i = 0; i < output_n; ++i) output[i] = output2[i] = rnd_.Rand8();
173 for (i = 0; i < num_iters; ++i) {
174 // Generate an input block and extend its borders horizontally
175 for (int r = 0; r < h; ++r)
176 for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand8();
177 for (int r = 0; r < h; ++r) {
178 memset(input + r * stride - border, input[r * stride], border);
179 memset(input + r * stride + w, input[r * stride + (w - 1)], border);
181 const int use_no_round = rnd_.Rand8() & 1;
182 for (sub_x = 0; sub_x < 2; ++sub_x)
183 for (sub_y = 0; sub_y < 2; ++sub_y) {
184 generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta);
185 for (int ii = 0; ii < 2; ++ii) {
186 for (int jj = 0; jj < 5; ++jj) {
187 for (int do_average = 0; do_average <= 1; ++do_average) {
188 if (use_no_round) {
189 conv_params = get_conv_params_no_round(0, do_average, 0, dsta,
190 out_w, 1, bd);
191 } else {
192 conv_params = get_conv_params(0, 0, 0, bd);
194 if (jj >= 4) {
195 conv_params.use_jnt_comp_avg = 0;
196 } else {
197 conv_params.use_jnt_comp_avg = 1;
198 conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
199 conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
201 av1_warp_affine_c(mat, input, w, h, stride, output, 32, 32, out_w,
202 out_h, out_w, sub_x, sub_y, &conv_params, alpha,
203 beta, gamma, delta);
204 if (use_no_round) {
205 conv_params = get_conv_params_no_round(0, do_average, 0, dstb,
206 out_w, 1, bd);
208 if (jj >= 4) {
209 conv_params.use_jnt_comp_avg = 0;
210 } else {
211 conv_params.use_jnt_comp_avg = 1;
212 conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
213 conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
215 test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
216 out_w, sub_x, sub_y, &conv_params, alpha, beta, gamma,
217 delta);
218 if (use_no_round) {
219 for (j = 0; j < out_w * out_h; ++j)
220 ASSERT_EQ(dsta[j], dstb[j])
221 << "Pixel mismatch at index " << j << " = ("
222 << (j % out_w) << ", " << (j / out_w) << ") on iteration "
223 << i;
224 for (j = 0; j < out_w * out_h; ++j)
225 ASSERT_EQ(output[j], output2[j])
226 << "Pixel mismatch at index " << j << " = ("
227 << (j % out_w) << ", " << (j / out_w) << ") on iteration "
228 << i;
229 } else {
230 for (j = 0; j < out_w * out_h; ++j)
231 ASSERT_EQ(output[j], output2[j])
232 << "Pixel mismatch at index " << j << " = ("
233 << (j % out_w) << ", " << (j / out_w) << ") on iteration "
234 << i;
241 delete[] input_;
242 delete[] output;
243 delete[] output2;
244 delete[] dsta;
245 delete[] dstb;
247 #endif
248 } // namespace AV1WarpFilter
250 namespace AV1HighbdWarpFilter {
251 #if CONFIG_LOWPRECISION_BLEND
252 ::testing::internal::ParamGenerator<HighbdWarpTestParam> BuildParams(
253 highbd_warp_affine_func filter) {
254 const HighbdWarpTestParam params[] = {
255 make_tuple(4, 4, 100, 8, filter), make_tuple(8, 8, 100, 8, filter),
256 make_tuple(64, 64, 100, 8, filter), make_tuple(4, 16, 100, 8, filter),
257 make_tuple(32, 8, 100, 8, filter), make_tuple(4, 4, 100, 10, filter),
258 make_tuple(8, 8, 100, 10, filter), make_tuple(64, 64, 100, 10, filter),
259 make_tuple(4, 16, 100, 10, filter), make_tuple(32, 8, 100, 10, filter),
260 make_tuple(4, 4, 100, 12, filter), make_tuple(8, 8, 100, 12, filter),
261 make_tuple(64, 64, 100, 12, filter), make_tuple(4, 16, 100, 12, filter),
262 make_tuple(32, 8, 100, 12, filter),
264 return ::testing::ValuesIn(params);
267 AV1HighbdWarpFilterTest::~AV1HighbdWarpFilterTest() {}
268 void AV1HighbdWarpFilterTest::SetUp() {
269 rnd_.Reset(ACMRandom::DeterministicSeed());
272 void AV1HighbdWarpFilterTest::TearDown() { libaom_test::ClearSystemState(); }
274 void AV1HighbdWarpFilterTest::RunSpeedTest(highbd_warp_affine_func test_impl) {
275 const int w = 128, h = 128;
276 const int border = 16;
277 const int stride = w + 2 * border;
278 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
279 const int bd = GET_PARAM(3);
280 const int mask = (1 << bd) - 1;
281 int sub_x, sub_y;
283 // The warp functions always write rows with widths that are multiples of 8.
284 // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
285 int output_n = ((out_w + 7) & ~7) * out_h;
286 uint16_t *input_ = new uint16_t[h * stride];
287 uint16_t *input = input_ + border;
288 uint16_t *output = new uint16_t[output_n];
289 int32_t mat[8];
290 int16_t alpha, beta, gamma, delta;
291 ConvolveParams conv_params = get_conv_params(0, 0, 0, bd);
292 CONV_BUF_TYPE *dsta = new CONV_BUF_TYPE[output_n];
294 generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta);
295 // Generate an input block and extend its borders horizontally
296 for (int r = 0; r < h; ++r)
297 for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand16() & mask;
298 for (int r = 0; r < h; ++r) {
299 for (int c = 0; c < border; ++c) {
300 input[r * stride - border + c] = input[r * stride];
301 input[r * stride + w + c] = input[r * stride + (w - 1)];
305 sub_x = 0;
306 sub_y = 0;
307 int do_average = 0;
308 conv_params.use_jnt_comp_avg = 0;
309 conv_params = get_conv_params_no_round(0, do_average, 0, dsta, out_w, 1, bd);
311 const int num_loops = 1000000000 / (out_w + out_h);
312 aom_usec_timer timer;
313 aom_usec_timer_start(&timer);
315 for (int i = 0; i < num_loops; ++i)
316 test_impl(mat, input, w, h, stride, output, 32, 32, out_w, out_h, out_w,
317 sub_x, sub_y, bd, &conv_params, alpha, beta, gamma, delta);
319 aom_usec_timer_mark(&timer);
320 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
321 printf("highbd warp %3dx%-3d: %7.2f ns\n", out_w, out_h,
322 1000.0 * elapsed_time / num_loops);
324 delete[] input_;
325 delete[] output;
326 delete[] dsta;
329 void AV1HighbdWarpFilterTest::RunCheckOutput(
330 highbd_warp_affine_func test_impl) {
331 const int w = 128, h = 128;
332 const int border = 16;
333 const int stride = w + 2 * border;
334 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
335 const int num_iters = GET_PARAM(2);
336 const int bd = GET_PARAM(3);
337 const int mask = (1 << bd) - 1;
338 int i, j, sub_x, sub_y;
340 // The warp functions always write rows with widths that are multiples of 8.
341 // So to avoid a buffer overflow, we may need to pad rows to a multiple of 8.
342 int output_n = ((out_w + 7) & ~7) * out_h;
343 uint16_t *input_ = new uint16_t[h * stride];
344 uint16_t *input = input_ + border;
345 uint16_t *output = new uint16_t[output_n];
346 uint16_t *output2 = new uint16_t[output_n];
347 int32_t mat[8];
348 int16_t alpha, beta, gamma, delta;
349 ConvolveParams conv_params = get_conv_params(0, 0, 0, bd);
350 CONV_BUF_TYPE *dsta = new CONV_BUF_TYPE[output_n];
351 CONV_BUF_TYPE *dstb = new CONV_BUF_TYPE[output_n];
352 for (int i = 0; i < output_n; ++i) output[i] = output2[i] = rnd_.Rand16();
354 for (i = 0; i < num_iters; ++i) {
355 // Generate an input block and extend its borders horizontally
356 for (int r = 0; r < h; ++r)
357 for (int c = 0; c < w; ++c) input[r * stride + c] = rnd_.Rand16() & mask;
358 for (int r = 0; r < h; ++r) {
359 for (int c = 0; c < border; ++c) {
360 input[r * stride - border + c] = input[r * stride];
361 input[r * stride + w + c] = input[r * stride + (w - 1)];
364 const int use_no_round = rnd_.Rand8() & 1;
365 for (sub_x = 0; sub_x < 2; ++sub_x)
366 for (sub_y = 0; sub_y < 2; ++sub_y) {
367 generate_warped_model(&rnd_, mat, &alpha, &beta, &gamma, &delta);
368 for (int ii = 0; ii < 2; ++ii) {
369 for (int jj = 0; jj < 5; ++jj) {
370 for (int do_average = 0; do_average <= 1; ++do_average) {
371 if (use_no_round) {
372 conv_params = get_conv_params_no_round(0, do_average, 0, dsta,
373 out_w, 1, bd);
374 } else {
375 conv_params = get_conv_params(0, 0, 0, bd);
377 if (jj >= 4) {
378 conv_params.use_jnt_comp_avg = 0;
379 } else {
380 conv_params.use_jnt_comp_avg = 1;
381 conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
382 conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
385 av1_highbd_warp_affine_c(mat, input, w, h, stride, output, 32, 32,
386 out_w, out_h, out_w, sub_x, sub_y, bd,
387 &conv_params, alpha, beta, gamma, delta);
388 if (use_no_round) {
389 // TODO(angiebird): Change this to test_impl once we have SIMD
390 // implementation
391 conv_params = get_conv_params_no_round(0, do_average, 0, dstb,
392 out_w, 1, bd);
394 if (jj >= 4) {
395 conv_params.use_jnt_comp_avg = 0;
396 } else {
397 conv_params.use_jnt_comp_avg = 1;
398 conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
399 conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
401 test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
402 out_w, sub_x, sub_y, bd, &conv_params, alpha, beta,
403 gamma, delta);
405 if (use_no_round) {
406 for (j = 0; j < out_w * out_h; ++j)
407 ASSERT_EQ(dsta[j], dstb[j])
408 << "Pixel mismatch at index " << j << " = ("
409 << (j % out_w) << ", " << (j / out_w) << ") on iteration "
410 << i;
411 for (j = 0; j < out_w * out_h; ++j)
412 ASSERT_EQ(output[j], output2[j])
413 << "Pixel mismatch at index " << j << " = ("
414 << (j % out_w) << ", " << (j / out_w) << ") on iteration "
415 << i;
416 } else {
417 for (j = 0; j < out_w * out_h; ++j)
418 ASSERT_EQ(output[j], output2[j])
419 << "Pixel mismatch at index " << j << " = ("
420 << (j % out_w) << ", " << (j / out_w) << ") on iteration "
421 << i;
429 delete[] input_;
430 delete[] output;
431 delete[] output2;
432 delete[] dsta;
433 delete[] dstb;
435 #endif
436 } // namespace AV1HighbdWarpFilter
437 } // namespace libaom_test