av1_convolve_ x,y _avx2() -- use 256 bit load/store
[aom.git] / test / sum_squares_test.cc
blobb8701c19645a5d1266560896eba6fec2389121c8
1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #include <cmath>
13 #include <cstdlib>
14 #include <string>
16 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
18 #include "./aom_config.h"
19 #include "./aom_dsp_rtcd.h"
20 #include "aom_ports/mem.h"
21 #include "test/acm_random.h"
22 #include "test/clear_system_state.h"
23 #include "test/register_state_check.h"
24 #include "test/util.h"
25 #include "test/function_equivalence_test.h"
27 using libaom_test::ACMRandom;
28 using libaom_test::FunctionEquivalenceTest;
30 namespace {
31 const int kNumIterations = 10000;
33 static const int16_t kInt13Max = (1 << 12) - 1;
35 typedef uint64_t (*SSI16Func)(const int16_t *src, int stride, int width,
36 int height);
37 typedef libaom_test::FuncParam<SSI16Func> TestFuncs;
39 class SumSquaresTest : public ::testing::TestWithParam<TestFuncs> {
40 public:
41 virtual ~SumSquaresTest() {}
42 virtual void SetUp() { params_ = this->GetParam(); }
44 virtual void TearDown() { libaom_test::ClearSystemState(); }
46 protected:
47 TestFuncs params_;
50 TEST_P(SumSquaresTest, OperationCheck) {
51 ACMRandom rnd(ACMRandom::DeterministicSeed());
52 DECLARE_ALIGNED(16, int16_t, src[256 * 256]);
54 int failed = 0;
56 const int msb = 11; // Up to 12 bit input
57 const int limit = 1 << (msb + 1);
59 for (int k = 0; k < kNumIterations; k++) {
60 int width = 4 * rnd(32); // Up to 128x128
61 int height = 4 * rnd(32); // Up to 128x128
62 int stride = 4 << rnd(7); // Up to 256 stride
63 while (stride < width) { // Make sure it's valid
64 stride = 4 << rnd(7);
67 for (int ii = 0; ii < height; ii++) {
68 for (int jj = 0; jj < width; jj++) {
69 src[ii * stride + jj] = rnd(2) ? rnd(limit) : -rnd(limit);
73 const uint64_t res_ref = params_.ref_func(src, stride, width, height);
74 uint64_t res_tst;
75 ASM_REGISTER_STATE_CHECK(res_tst =
76 params_.tst_func(src, stride, width, height));
78 if (!failed) {
79 failed = res_ref != res_tst;
80 EXPECT_EQ(res_ref, res_tst)
81 << "Error: Sum Squares Test"
82 << " C output does not match optimized output.";
87 TEST_P(SumSquaresTest, ExtremeValues) {
88 ACMRandom rnd(ACMRandom::DeterministicSeed());
89 DECLARE_ALIGNED(16, int16_t, src[256 * 256]);
91 int failed = 0;
93 const int msb = 11; // Up to 12 bit input
94 const int limit = 1 << (msb + 1);
96 for (int k = 0; k < kNumIterations; k++) {
97 int width = 4 * rnd(32); // Up to 128x128
98 int height = 4 * rnd(32); // Up to 128x128
99 int stride = 4 << rnd(7); // Up to 256 stride
100 while (stride < width) { // Make sure it's valid
101 stride = 4 << rnd(7);
104 int val = rnd(2) ? limit - 1 : -(limit - 1);
105 for (int ii = 0; ii < height; ii++) {
106 for (int jj = 0; jj < width; jj++) {
107 src[ii * stride + jj] = val;
111 const uint64_t res_ref = params_.ref_func(src, stride, width, height);
112 uint64_t res_tst;
113 ASM_REGISTER_STATE_CHECK(res_tst =
114 params_.tst_func(src, stride, width, height));
116 if (!failed) {
117 failed = res_ref != res_tst;
118 EXPECT_EQ(res_ref, res_tst)
119 << "Error: Sum Squares Test"
120 << " C output does not match optimized output.";
125 #if HAVE_SSE2
127 INSTANTIATE_TEST_CASE_P(
128 SSE2, SumSquaresTest,
129 ::testing::Values(TestFuncs(&aom_sum_squares_2d_i16_c,
130 &aom_sum_squares_2d_i16_sse2)));
132 #endif // HAVE_SSE2
134 //////////////////////////////////////////////////////////////////////////////
135 // 1D version
136 //////////////////////////////////////////////////////////////////////////////
138 typedef uint64_t (*F1D)(const int16_t *src, uint32_t N);
139 typedef libaom_test::FuncParam<F1D> TestFuncs1D;
141 class SumSquares1DTest : public FunctionEquivalenceTest<F1D> {
142 protected:
143 static const int kIterations = 1000;
144 static const int kMaxSize = 256;
147 TEST_P(SumSquares1DTest, RandomValues) {
148 DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]);
150 for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
151 for (int i = 0; i < kMaxSize * kMaxSize; ++i)
152 src[i] = rng_(kInt13Max * 2 + 1) - kInt13Max;
154 const int N = rng_(2) ? rng_(kMaxSize * kMaxSize + 1 - kMaxSize) + kMaxSize
155 : rng_(kMaxSize) + 1;
157 const uint64_t ref_res = params_.ref_func(src, N);
158 uint64_t tst_res;
159 ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, N));
161 ASSERT_EQ(ref_res, tst_res);
165 TEST_P(SumSquares1DTest, ExtremeValues) {
166 DECLARE_ALIGNED(16, int16_t, src[kMaxSize * kMaxSize]);
168 for (int iter = 0; iter < kIterations && !HasFatalFailure(); ++iter) {
169 if (rng_(2)) {
170 for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = kInt13Max;
171 } else {
172 for (int i = 0; i < kMaxSize * kMaxSize; ++i) src[i] = -kInt13Max;
175 const int N = rng_(2) ? rng_(kMaxSize * kMaxSize + 1 - kMaxSize) + kMaxSize
176 : rng_(kMaxSize) + 1;
178 const uint64_t ref_res = params_.ref_func(src, N);
179 uint64_t tst_res;
180 ASM_REGISTER_STATE_CHECK(tst_res = params_.tst_func(src, N));
182 ASSERT_EQ(ref_res, tst_res);
186 #if HAVE_SSE2
187 INSTANTIATE_TEST_CASE_P(SSE2, SumSquares1DTest,
188 ::testing::Values(TestFuncs1D(
189 aom_sum_squares_i16_c, aom_sum_squares_i16_sse2)));
191 #endif // HAVE_SSE2
192 } // namespace