hadamard: Add 4x4 test.
[aom.git] / test / convolve_test.cc
blobd5e37505098a2f8ca342c422835c951fed1ea57d
1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #include <string.h>
13 #include <tuple>
15 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
17 #include "config/aom_config.h"
18 #include "config/aom_dsp_rtcd.h"
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_dsp/aom_filter.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25 #include "av1/common/filter.h"
26 #include "test/acm_random.h"
27 #include "test/register_state_check.h"
28 #include "test/util.h"
30 namespace {
32 static const unsigned int kMaxDimension = MAX_SB_SIZE;
34 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
35 uint8_t *dst, ptrdiff_t dst_stride,
36 const int16_t *filter_x, int filter_x_stride,
37 const int16_t *filter_y, int filter_y_stride,
38 int w, int h);
40 struct ConvolveFunctions {
41 ConvolveFunctions(ConvolveFunc h8, ConvolveFunc v8, int bd)
42 : h8_(h8), v8_(v8), use_highbd_(bd) {}
44 ConvolveFunc h8_;
45 ConvolveFunc v8_;
46 int use_highbd_; // 0 if high bitdepth not used, else the actual bit depth.
49 typedef std::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
51 #define ALL_SIZES_64(convolve_fn) \
52 make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn), \
53 make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn), \
54 make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn), \
55 make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \
56 make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \
57 make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
58 make_tuple(64, 64, &convolve_fn)
60 #define ALL_SIZES(convolve_fn) \
61 make_tuple(128, 64, &convolve_fn), make_tuple(64, 128, &convolve_fn), \
62 make_tuple(128, 128, &convolve_fn), ALL_SIZES_64(convolve_fn)
64 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
65 #define AV1_FILTER_WEIGHT 128
66 #define AV1_FILTER_SHIFT 7
67 uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }
69 void filter_block2d_8_c(const uint8_t *src_ptr, unsigned int src_stride,
70 const int16_t *HFilter, const int16_t *VFilter,
71 uint8_t *dst_ptr, unsigned int dst_stride,
72 unsigned int output_width, unsigned int output_height) {
73 // Between passes, we use an intermediate buffer whose height is extended to
74 // have enough horizontally filtered values as input for the vertical pass.
75 // This buffer is allocated to be big enough for the largest block type we
76 // support.
77 const int kInterp_Extend = 4;
78 const unsigned int intermediate_height =
79 (kInterp_Extend - 1) + output_height + kInterp_Extend;
80 unsigned int i, j;
82 assert(intermediate_height > 7);
84 // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
85 // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
86 // + kInterp_Extend
87 // = 3 + 16 + 4
88 // = 23
89 // and filter_max_width = 16
91 uint8_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension];
92 const int intermediate_next_stride =
93 1 - static_cast<int>(intermediate_height * output_width);
95 // Horizontal pass (src -> transposed intermediate).
96 uint8_t *output_ptr = intermediate_buffer;
97 const int src_next_row_stride = src_stride - output_width;
98 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
99 for (i = 0; i < intermediate_height; ++i) {
100 for (j = 0; j < output_width; ++j) {
101 // Apply filter...
102 const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
103 (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
104 (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
105 (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
106 (AV1_FILTER_WEIGHT >> 1); // Rounding
108 // Normalize back to 0-255...
109 *output_ptr = clip_pixel(temp >> AV1_FILTER_SHIFT);
110 ++src_ptr;
111 output_ptr += intermediate_height;
113 src_ptr += src_next_row_stride;
114 output_ptr += intermediate_next_stride;
117 // Vertical pass (transposed intermediate -> dst).
118 src_ptr = intermediate_buffer;
119 const int dst_next_row_stride = dst_stride - output_width;
120 for (i = 0; i < output_height; ++i) {
121 for (j = 0; j < output_width; ++j) {
122 // Apply filter...
123 const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
124 (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
125 (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
126 (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
127 (AV1_FILTER_WEIGHT >> 1); // Rounding
129 // Normalize back to 0-255...
130 *dst_ptr++ = clip_pixel(temp >> AV1_FILTER_SHIFT);
131 src_ptr += intermediate_height;
133 src_ptr += intermediate_next_stride;
134 dst_ptr += dst_next_row_stride;
138 void block2d_average_c(uint8_t *src, unsigned int src_stride,
139 uint8_t *output_ptr, unsigned int output_stride,
140 unsigned int output_width, unsigned int output_height) {
141 unsigned int i, j;
142 for (i = 0; i < output_height; ++i) {
143 for (j = 0; j < output_width; ++j) {
144 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
146 output_ptr += output_stride;
150 void filter_average_block2d_8_c(const uint8_t *src_ptr,
151 const unsigned int src_stride,
152 const int16_t *HFilter, const int16_t *VFilter,
153 uint8_t *dst_ptr, unsigned int dst_stride,
154 unsigned int output_width,
155 unsigned int output_height) {
156 uint8_t tmp[kMaxDimension * kMaxDimension];
158 assert(output_width <= kMaxDimension);
159 assert(output_height <= kMaxDimension);
160 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, kMaxDimension,
161 output_width, output_height);
162 block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride, output_width,
163 output_height);
166 void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
167 const unsigned int src_stride,
168 const int16_t *HFilter, const int16_t *VFilter,
169 uint16_t *dst_ptr, unsigned int dst_stride,
170 unsigned int output_width,
171 unsigned int output_height, int bd) {
172 // Between passes, we use an intermediate buffer whose height is extended to
173 // have enough horizontally filtered values as input for the vertical pass.
174 // This buffer is allocated to be big enough for the largest block type we
175 // support.
176 const int kInterp_Extend = 4;
177 const unsigned int intermediate_height =
178 (kInterp_Extend - 1) + output_height + kInterp_Extend;
180 /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
181 * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
182 * + kInterp_Extend
183 * = 3 + 16 + 4
184 * = 23
185 * and filter_max_width = 16
187 uint16_t intermediate_buffer[(kMaxDimension + 8) * kMaxDimension] = { 0 };
188 const int intermediate_next_stride =
189 1 - static_cast<int>(intermediate_height * output_width);
191 // Horizontal pass (src -> transposed intermediate).
193 uint16_t *output_ptr = intermediate_buffer;
194 const int src_next_row_stride = src_stride - output_width;
195 unsigned int i, j;
196 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
197 for (i = 0; i < intermediate_height; ++i) {
198 for (j = 0; j < output_width; ++j) {
199 // Apply filter...
200 const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
201 (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
202 (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
203 (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
204 (AV1_FILTER_WEIGHT >> 1); // Rounding
206 // Normalize back to 0-255...
207 *output_ptr = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
208 ++src_ptr;
209 output_ptr += intermediate_height;
211 src_ptr += src_next_row_stride;
212 output_ptr += intermediate_next_stride;
216 // Vertical pass (transposed intermediate -> dst).
218 const uint16_t *interm_ptr = intermediate_buffer;
219 const int dst_next_row_stride = dst_stride - output_width;
220 unsigned int i, j;
221 for (i = 0; i < output_height; ++i) {
222 for (j = 0; j < output_width; ++j) {
223 // Apply filter...
224 const int temp =
225 (interm_ptr[0] * VFilter[0]) + (interm_ptr[1] * VFilter[1]) +
226 (interm_ptr[2] * VFilter[2]) + (interm_ptr[3] * VFilter[3]) +
227 (interm_ptr[4] * VFilter[4]) + (interm_ptr[5] * VFilter[5]) +
228 (interm_ptr[6] * VFilter[6]) + (interm_ptr[7] * VFilter[7]) +
229 (AV1_FILTER_WEIGHT >> 1); // Rounding
231 // Normalize back to 0-255...
232 *dst_ptr++ = clip_pixel_highbd(temp >> AV1_FILTER_SHIFT, bd);
233 interm_ptr += intermediate_height;
235 interm_ptr += intermediate_next_stride;
236 dst_ptr += dst_next_row_stride;
241 void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,
242 uint16_t *output_ptr, unsigned int output_stride,
243 unsigned int output_width,
244 unsigned int output_height) {
245 unsigned int i, j;
246 for (i = 0; i < output_height; ++i) {
247 for (j = 0; j < output_width; ++j) {
248 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
250 output_ptr += output_stride;
254 void highbd_filter_average_block2d_8_c(
255 const uint16_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
256 const int16_t *VFilter, uint16_t *dst_ptr, unsigned int dst_stride,
257 unsigned int output_width, unsigned int output_height, int bd) {
258 uint16_t tmp[kMaxDimension * kMaxDimension];
260 assert(output_width <= kMaxDimension);
261 assert(output_height <= kMaxDimension);
262 highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp,
263 kMaxDimension, output_width, output_height, bd);
264 highbd_block2d_average_c(tmp, kMaxDimension, dst_ptr, dst_stride,
265 output_width, output_height);
268 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
269 public:
270 static void SetUpTestSuite() {
271 // Force input_ to be unaligned, output to be 16 byte aligned.
272 input_ = reinterpret_cast<uint8_t *>(
273 aom_memalign(kDataAlignment, kInputBufferSize + 1)) +
275 ASSERT_NE(input_, nullptr);
276 ref8_ = reinterpret_cast<uint8_t *>(
277 aom_memalign(kDataAlignment, kOutputStride * kMaxDimension));
278 ASSERT_NE(ref8_, nullptr);
279 output_ = reinterpret_cast<uint8_t *>(
280 aom_memalign(kDataAlignment, kOutputBufferSize));
281 ASSERT_NE(output_, nullptr);
282 output_ref_ = reinterpret_cast<uint8_t *>(
283 aom_memalign(kDataAlignment, kOutputBufferSize));
284 ASSERT_NE(output_ref_, nullptr);
285 input16_ = reinterpret_cast<uint16_t *>(aom_memalign(
286 kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) +
288 ASSERT_NE(input16_, nullptr);
289 ref16_ = reinterpret_cast<uint16_t *>(aom_memalign(
290 kDataAlignment, kOutputStride * kMaxDimension * sizeof(uint16_t)));
291 ASSERT_NE(ref16_, nullptr);
292 output16_ = reinterpret_cast<uint16_t *>(
293 aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
294 ASSERT_NE(output16_, nullptr);
295 output16_ref_ = reinterpret_cast<uint16_t *>(
296 aom_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
297 ASSERT_NE(output16_ref_, nullptr);
300 virtual void TearDown() {}
302 static void TearDownTestSuite() {
303 aom_free(input_ - 1);
304 input_ = NULL;
305 aom_free(ref8_);
306 ref8_ = NULL;
307 aom_free(output_);
308 output_ = NULL;
309 aom_free(output_ref_);
310 output_ref_ = NULL;
311 aom_free(input16_ - 1);
312 input16_ = NULL;
313 aom_free(ref16_);
314 ref16_ = NULL;
315 aom_free(output16_);
316 output16_ = NULL;
317 aom_free(output16_ref_);
318 output16_ref_ = NULL;
321 protected:
322 static const int kDataAlignment = 16;
323 static const int kOuterBlockSize = 4 * kMaxDimension;
324 static const int kInputStride = kOuterBlockSize;
325 static const int kOutputStride = kOuterBlockSize;
326 static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
327 static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
329 int Width() const { return GET_PARAM(0); }
330 int Height() const { return GET_PARAM(1); }
331 int BorderLeft() const {
332 const int center = (kOuterBlockSize - Width()) / 2;
333 return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
335 int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
337 bool IsIndexInBorder(int i) {
338 return (i < BorderTop() * kOuterBlockSize ||
339 i >= (BorderTop() + Height()) * kOuterBlockSize ||
340 i % kOuterBlockSize < BorderLeft() ||
341 i % kOuterBlockSize >= (BorderLeft() + Width()));
344 virtual void SetUp() {
345 UUT_ = GET_PARAM(2);
346 if (UUT_->use_highbd_ != 0)
347 mask_ = (1 << UUT_->use_highbd_) - 1;
348 else
349 mask_ = 255;
350 /* Set up guard blocks for an inner block centered in the outer block */
351 for (int i = 0; i < kOutputBufferSize; ++i) {
352 if (IsIndexInBorder(i)) {
353 output_[i] = 255;
354 output16_[i] = mask_;
355 } else {
356 output_[i] = 0;
357 output16_[i] = 0;
361 ::libaom_test::ACMRandom prng;
362 for (int i = 0; i < kInputBufferSize; ++i) {
363 if (i & 1) {
364 input_[i] = 255;
365 input16_[i] = mask_;
366 } else {
367 input_[i] = prng.Rand8Extremes();
368 input16_[i] = prng.Rand16() & mask_;
373 void SetConstantInput(int value) {
374 memset(input_, value, kInputBufferSize);
375 aom_memset16(input16_, value, kInputBufferSize);
378 void CopyOutputToRef() {
379 memcpy(output_ref_, output_, kOutputBufferSize);
380 // Copy 16-bit pixels values. The effective number of bytes is double.
381 memcpy(output16_ref_, output16_, sizeof(output16_[0]) * kOutputBufferSize);
384 void CheckGuardBlocks() {
385 for (int i = 0; i < kOutputBufferSize; ++i) {
386 if (IsIndexInBorder(i)) {
387 EXPECT_EQ(255, output_[i]);
392 uint8_t *input() const {
393 const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
394 if (UUT_->use_highbd_ == 0) {
395 return input_ + offset;
396 } else {
397 return CONVERT_TO_BYTEPTR(input16_) + offset;
401 uint8_t *output() const {
402 const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
403 if (UUT_->use_highbd_ == 0) {
404 return output_ + offset;
405 } else {
406 return CONVERT_TO_BYTEPTR(output16_) + offset;
410 uint8_t *output_ref() const {
411 const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
412 if (UUT_->use_highbd_ == 0) {
413 return output_ref_ + offset;
414 } else {
415 return CONVERT_TO_BYTEPTR(output16_ref_) + offset;
419 uint16_t lookup(uint8_t *list, int index) const {
420 if (UUT_->use_highbd_ == 0) {
421 return list[index];
422 } else {
423 return CONVERT_TO_SHORTPTR(list)[index];
427 void assign_val(uint8_t *list, int index, uint16_t val) const {
428 if (UUT_->use_highbd_ == 0) {
429 list[index] = (uint8_t)val;
430 } else {
431 CONVERT_TO_SHORTPTR(list)[index] = val;
435 void wrapper_filter_average_block2d_8_c(
436 const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
437 const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
438 unsigned int output_width, unsigned int output_height) {
439 if (UUT_->use_highbd_ == 0) {
440 filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
441 dst_stride, output_width, output_height);
442 } else {
443 highbd_filter_average_block2d_8_c(
444 CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter,
445 CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height,
446 UUT_->use_highbd_);
450 void wrapper_filter_block2d_8_c(
451 const uint8_t *src_ptr, unsigned int src_stride, const int16_t *HFilter,
452 const int16_t *VFilter, uint8_t *dst_ptr, unsigned int dst_stride,
453 unsigned int output_width, unsigned int output_height) {
454 if (UUT_->use_highbd_ == 0) {
455 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
456 dst_stride, output_width, output_height);
457 } else {
458 highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
459 HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr),
460 dst_stride, output_width, output_height,
461 UUT_->use_highbd_);
465 const ConvolveFunctions *UUT_;
466 static uint8_t *input_;
467 static uint8_t *ref8_;
468 static uint8_t *output_;
469 static uint8_t *output_ref_;
470 static uint16_t *input16_;
471 static uint16_t *ref16_;
472 static uint16_t *output16_;
473 static uint16_t *output16_ref_;
474 int mask_;
477 uint8_t *ConvolveTest::input_ = NULL;
478 uint8_t *ConvolveTest::ref8_ = NULL;
479 uint8_t *ConvolveTest::output_ = NULL;
480 uint8_t *ConvolveTest::output_ref_ = NULL;
481 uint16_t *ConvolveTest::input16_ = NULL;
482 uint16_t *ConvolveTest::ref16_ = NULL;
483 uint16_t *ConvolveTest::output16_ = NULL;
484 uint16_t *ConvolveTest::output16_ref_ = NULL;
486 TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
488 const int kNumFilterBanks = SWITCHABLE_FILTERS;
489 const int kNumFilters = 16;
491 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
492 int subpel_search;
493 for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
494 ++subpel_search) {
495 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
496 const InterpFilter filter = (InterpFilter)filter_bank;
497 const InterpKernel *filters =
498 (const InterpKernel *)av1_get_interp_filter_kernel(filter,
499 subpel_search);
500 for (int i = 0; i < kNumFilters; i++) {
501 const int p0 = filters[i][0] + filters[i][1];
502 const int p1 = filters[i][2] + filters[i][3];
503 const int p2 = filters[i][4] + filters[i][5];
504 const int p3 = filters[i][6] + filters[i][7];
505 EXPECT_LE(p0, 128);
506 EXPECT_LE(p1, 128);
507 EXPECT_LE(p2, 128);
508 EXPECT_LE(p3, 128);
509 EXPECT_LE(p0 + p3, 128);
510 EXPECT_LE(p0 + p3 + p1, 128);
511 EXPECT_LE(p0 + p3 + p1 + p2, 128);
512 EXPECT_EQ(p0 + p1 + p2 + p3, 128);
518 const int16_t kInvalidFilter[8] = { 0 };
520 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
521 uint8_t *const in = input();
522 uint8_t *const out = output();
523 uint8_t *ref;
524 if (UUT_->use_highbd_ == 0) {
525 ref = ref8_;
526 } else {
527 ref = CONVERT_TO_BYTEPTR(ref16_);
529 int subpel_search;
530 for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
531 ++subpel_search) {
532 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
533 const InterpFilter filter = (InterpFilter)filter_bank;
534 const InterpKernel *filters =
535 (const InterpKernel *)av1_get_interp_filter_kernel(filter,
536 subpel_search);
537 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
538 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
539 wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
540 filters[filter_y], ref, kOutputStride,
541 Width(), Height());
543 if (filter_x && filter_y)
544 continue;
545 else if (filter_y)
546 API_REGISTER_STATE_CHECK(
547 UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
548 16, filters[filter_y], 16, Width(), Height()));
549 else if (filter_x)
550 API_REGISTER_STATE_CHECK(UUT_->h8_(
551 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
552 kInvalidFilter, 16, Width(), Height()));
553 else
554 continue;
556 CheckGuardBlocks();
558 for (int y = 0; y < Height(); ++y)
559 for (int x = 0; x < Width(); ++x)
560 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
561 lookup(out, y * kOutputStride + x))
562 << "mismatch at (" << x << "," << y << "), "
563 << "filters (" << filter_bank << "," << filter_x << ","
564 << filter_y << ")";
571 TEST_P(ConvolveTest, FilterExtremes) {
572 uint8_t *const in = input();
573 uint8_t *const out = output();
574 uint8_t *ref;
575 if (UUT_->use_highbd_ == 0) {
576 ref = ref8_;
577 } else {
578 ref = CONVERT_TO_BYTEPTR(ref16_);
581 // Populate ref and out with some random data
582 ::libaom_test::ACMRandom prng;
583 for (int y = 0; y < Height(); ++y) {
584 for (int x = 0; x < Width(); ++x) {
585 uint16_t r;
586 if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
587 r = prng.Rand8Extremes();
588 } else {
589 r = prng.Rand16() & mask_;
591 assign_val(out, y * kOutputStride + x, r);
592 assign_val(ref, y * kOutputStride + x, r);
596 for (int axis = 0; axis < 2; axis++) {
597 int seed_val = 0;
598 while (seed_val < 256) {
599 for (int y = 0; y < 8; ++y) {
600 for (int x = 0; x < 8; ++x) {
601 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
602 ((seed_val >> (axis ? y : x)) & 1) * mask_);
603 if (axis) seed_val++;
605 if (axis)
606 seed_val -= 8;
607 else
608 seed_val++;
610 if (axis) seed_val += 8;
611 int subpel_search;
612 for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
613 ++subpel_search) {
614 for (int filter_bank = 0; filter_bank < kNumFilterBanks;
615 ++filter_bank) {
616 const InterpFilter filter = (InterpFilter)filter_bank;
617 const InterpKernel *filters =
618 (const InterpKernel *)av1_get_interp_filter_kernel(filter,
619 subpel_search);
620 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
621 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
622 wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
623 filters[filter_y], ref, kOutputStride,
624 Width(), Height());
625 if (filter_x && filter_y)
626 continue;
627 else if (filter_y)
628 API_REGISTER_STATE_CHECK(UUT_->v8_(
629 in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
630 filters[filter_y], 16, Width(), Height()));
631 else if (filter_x)
632 API_REGISTER_STATE_CHECK(UUT_->h8_(
633 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
634 kInvalidFilter, 16, Width(), Height()));
635 else
636 continue;
638 for (int y = 0; y < Height(); ++y)
639 for (int x = 0; x < Width(); ++x)
640 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
641 lookup(out, y * kOutputStride + x))
642 << "mismatch at (" << x << "," << y << "), "
643 << "filters (" << filter_bank << "," << filter_x << ","
644 << filter_y << ")";
653 TEST_P(ConvolveTest, DISABLED_Speed) {
654 uint8_t *const in = input();
655 uint8_t *const out = output();
656 uint8_t *ref;
657 if (UUT_->use_highbd_ == 0) {
658 ref = ref8_;
659 } else {
660 ref = CONVERT_TO_BYTEPTR(ref16_);
663 // Populate ref and out with some random data
664 ::libaom_test::ACMRandom prng;
665 for (int y = 0; y < Height(); ++y) {
666 for (int x = 0; x < Width(); ++x) {
667 uint16_t r;
668 if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
669 r = prng.Rand8Extremes();
670 } else {
671 r = prng.Rand16() & mask_;
673 assign_val(out, y * kOutputStride + x, r);
674 assign_val(ref, y * kOutputStride + x, r);
678 const InterpFilter filter = (InterpFilter)1;
679 const InterpKernel *filters =
680 (const InterpKernel *)av1_get_interp_filter_kernel(filter, USE_8_TAPS);
681 wrapper_filter_average_block2d_8_c(in, kInputStride, filters[1], filters[1],
682 out, kOutputStride, Width(), Height());
684 aom_usec_timer timer;
685 int tests_num = 1000;
687 aom_usec_timer_start(&timer);
688 while (tests_num > 0) {
689 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
690 const InterpFilter filter = (InterpFilter)filter_bank;
691 const InterpKernel *filters =
692 (const InterpKernel *)av1_get_interp_filter_kernel(filter,
693 USE_8_TAPS);
694 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
695 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
696 if (filter_x && filter_y) continue;
697 if (filter_y)
698 API_REGISTER_STATE_CHECK(
699 UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
700 16, filters[filter_y], 16, Width(), Height()));
701 else if (filter_x)
702 API_REGISTER_STATE_CHECK(UUT_->h8_(
703 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
704 kInvalidFilter, 16, Width(), Height()));
708 tests_num--;
710 aom_usec_timer_mark(&timer);
712 const int elapsed_time =
713 static_cast<int>(aom_usec_timer_elapsed(&timer) / 1000);
714 printf("%dx%d (bitdepth %d) time: %5d ms\n", Width(), Height(),
715 UUT_->use_highbd_, elapsed_time);
718 using std::make_tuple;
720 // WRAP macro is only used for high bitdepth build.
721 #if CONFIG_AV1_HIGHBITDEPTH
722 #define WRAP(func, bd) \
723 static void wrap_##func##_##bd( \
724 const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
725 ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride, \
726 const int16_t *filter_y, int filter_y_stride, int w, int h) { \
727 aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x, \
728 filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
730 #if HAVE_SSE2 && ARCH_X86_64
731 WRAP(convolve8_horiz_sse2, 8)
732 WRAP(convolve8_vert_sse2, 8)
733 WRAP(convolve8_horiz_sse2, 10)
734 WRAP(convolve8_vert_sse2, 10)
735 WRAP(convolve8_horiz_sse2, 12)
736 WRAP(convolve8_vert_sse2, 12)
737 #endif // HAVE_SSE2 && ARCH_X86_64
739 WRAP(convolve8_horiz_c, 8)
740 WRAP(convolve8_vert_c, 8)
741 WRAP(convolve8_horiz_c, 10)
742 WRAP(convolve8_vert_c, 10)
743 WRAP(convolve8_horiz_c, 12)
744 WRAP(convolve8_vert_c, 12)
746 #if HAVE_AVX2
747 WRAP(convolve8_horiz_avx2, 8)
748 WRAP(convolve8_vert_avx2, 8)
750 WRAP(convolve8_horiz_avx2, 10)
751 WRAP(convolve8_vert_avx2, 10)
753 WRAP(convolve8_horiz_avx2, 12)
754 WRAP(convolve8_vert_avx2, 12)
755 #endif // HAVE_AVX2
756 #endif // CONFIG_AV1_HIGHBITDEPTH
758 #undef WRAP
760 #if CONFIG_AV1_HIGHBITDEPTH
761 const ConvolveFunctions wrap_convolve8_c(wrap_convolve8_horiz_c_8,
762 wrap_convolve8_vert_c_8, 8);
763 const ConvolveFunctions wrap_convolve10_c(wrap_convolve8_horiz_c_10,
764 wrap_convolve8_vert_c_10, 10);
765 const ConvolveFunctions wrap_convolve12_c(wrap_convolve8_horiz_c_12,
766 wrap_convolve8_vert_c_12, 12);
767 const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(wrap_convolve8_c),
768 ALL_SIZES(wrap_convolve10_c),
769 ALL_SIZES(wrap_convolve12_c) };
770 #else
771 const ConvolveFunctions convolve8_c(aom_convolve8_horiz_c, aom_convolve8_vert_c,
773 const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
774 #endif
776 INSTANTIATE_TEST_SUITE_P(C, ConvolveTest,
777 ::testing::ValuesIn(kArrayConvolve_c));
779 #if HAVE_SSE2 && ARCH_X86_64
780 #if CONFIG_AV1_HIGHBITDEPTH
781 const ConvolveFunctions wrap_convolve8_sse2(wrap_convolve8_horiz_sse2_8,
782 wrap_convolve8_vert_sse2_8, 8);
783 const ConvolveFunctions wrap_convolve10_sse2(wrap_convolve8_horiz_sse2_10,
784 wrap_convolve8_vert_sse2_10, 10);
785 const ConvolveFunctions wrap_convolve12_sse2(wrap_convolve8_horiz_sse2_12,
786 wrap_convolve8_vert_sse2_12, 12);
787 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(wrap_convolve8_sse2),
788 ALL_SIZES(wrap_convolve10_sse2),
789 ALL_SIZES(wrap_convolve12_sse2) };
790 #else
791 const ConvolveFunctions convolve8_sse2(aom_convolve8_horiz_sse2,
792 aom_convolve8_vert_sse2, 0);
793 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) };
794 #endif
795 INSTANTIATE_TEST_SUITE_P(SSE2, ConvolveTest,
796 ::testing::ValuesIn(kArrayConvolve_sse2));
797 #endif
799 #if HAVE_SSSE3
800 const ConvolveFunctions convolve8_ssse3(aom_convolve8_horiz_ssse3,
801 aom_convolve8_vert_ssse3, 0);
803 const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
804 INSTANTIATE_TEST_SUITE_P(SSSE3, ConvolveTest,
805 ::testing::ValuesIn(kArrayConvolve8_ssse3));
806 #endif
808 #if HAVE_AVX2
809 #if CONFIG_AV1_HIGHBITDEPTH
810 const ConvolveFunctions wrap_convolve8_avx2(wrap_convolve8_horiz_avx2_8,
811 wrap_convolve8_vert_avx2_8, 8);
812 const ConvolveFunctions wrap_convolve10_avx2(wrap_convolve8_horiz_avx2_10,
813 wrap_convolve8_vert_avx2_10, 10);
814 const ConvolveFunctions wrap_convolve12_avx2(wrap_convolve8_horiz_avx2_12,
815 wrap_convolve8_vert_avx2_12, 12);
816 const ConvolveParam kArray_Convolve8_avx2[] = {
817 ALL_SIZES_64(wrap_convolve8_avx2), ALL_SIZES_64(wrap_convolve10_avx2),
818 ALL_SIZES_64(wrap_convolve12_avx2)
820 #else
821 const ConvolveFunctions convolve8_avx2(aom_convolve8_horiz_avx2,
822 aom_convolve8_vert_avx2, 0);
823 const ConvolveParam kArray_Convolve8_avx2[] = { ALL_SIZES(convolve8_avx2) };
824 #endif
826 INSTANTIATE_TEST_SUITE_P(AVX2, ConvolveTest,
827 ::testing::ValuesIn(kArray_Convolve8_avx2));
828 #endif // HAVE_AVX2
830 } // namespace