Add ssse3 aom_smooth_h_predictor_4xh
[aom.git] / test / av1_highbd_iht_test.cc
blob59aaf046257213d6ac2d547b183aa4adac66a43a
1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
14 #include "./av1_rtcd.h"
15 #include "test/acm_random.h"
16 #include "test/clear_system_state.h"
17 #include "test/register_state_check.h"
18 #include "test/util.h"
19 #include "av1/common/enums.h"
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_ports/mem.h"
23 namespace {
25 using libaom_test::ACMRandom;
26 using std::tr1::tuple;
28 typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
29 TX_TYPE tx_type, int bd);
31 typedef void (*IHbdHtFunc)(const int32_t *coeff, uint16_t *output, int stride,
32 TX_TYPE tx_type, int bd);
34 // Test parameter argument list:
35 // <transform reference function,
36 // optimized inverse transform function,
37 // inverse transform reference function,
38 // num_coeffs,
39 // tx_type,
40 // bit_depth>
41 typedef tuple<HbdHtFunc, IHbdHtFunc, IHbdHtFunc, int, TX_TYPE, int> IHbdHtParam;
43 class AV1HighbdInvHTNxN : public ::testing::TestWithParam<IHbdHtParam> {
44 public:
45 virtual ~AV1HighbdInvHTNxN() {}
47 virtual void SetUp() {
48 txfm_ref_ = GET_PARAM(0);
49 inv_txfm_ = GET_PARAM(1);
50 inv_txfm_ref_ = GET_PARAM(2);
51 num_coeffs_ = GET_PARAM(3);
52 tx_type_ = GET_PARAM(4);
53 bit_depth_ = GET_PARAM(5);
55 input_ = reinterpret_cast<int16_t *>(
56 aom_memalign(16, sizeof(input_[0]) * num_coeffs_));
58 // Note:
59 // Inverse transform input buffer is 32-byte aligned
60 // Refer to <root>/av1/encoder/context_tree.c, function,
61 // void alloc_mode_context().
62 coeffs_ = reinterpret_cast<int32_t *>(
63 aom_memalign(32, sizeof(coeffs_[0]) * num_coeffs_));
64 output_ = reinterpret_cast<uint16_t *>(
65 aom_memalign(32, sizeof(output_[0]) * num_coeffs_));
66 output_ref_ = reinterpret_cast<uint16_t *>(
67 aom_memalign(32, sizeof(output_ref_[0]) * num_coeffs_));
70 virtual void TearDown() {
71 aom_free(input_);
72 aom_free(coeffs_);
73 aom_free(output_);
74 aom_free(output_ref_);
75 libaom_test::ClearSystemState();
78 protected:
79 void RunBitexactCheck();
81 private:
82 int GetStride() const {
83 if (16 == num_coeffs_) {
84 return 4;
85 } else if (64 == num_coeffs_) {
86 return 8;
87 } else if (256 == num_coeffs_) {
88 return 16;
89 } else if (1024 == num_coeffs_) {
90 return 32;
91 } else if (4096 == num_coeffs_) {
92 return 64;
93 } else {
94 return 0;
98 HbdHtFunc txfm_ref_;
99 IHbdHtFunc inv_txfm_;
100 IHbdHtFunc inv_txfm_ref_;
101 int num_coeffs_;
102 TX_TYPE tx_type_;
103 int bit_depth_;
105 int16_t *input_;
106 int32_t *coeffs_;
107 uint16_t *output_;
108 uint16_t *output_ref_;
111 void AV1HighbdInvHTNxN::RunBitexactCheck() {
112 ACMRandom rnd(ACMRandom::DeterministicSeed());
113 const int stride = GetStride();
114 const int num_tests = 20000;
115 const uint16_t mask = (1 << bit_depth_) - 1;
117 for (int i = 0; i < num_tests; ++i) {
118 for (int j = 0; j < num_coeffs_; ++j) {
119 input_[j] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask);
120 output_ref_[j] = rnd.Rand16() & mask;
121 output_[j] = output_ref_[j];
124 txfm_ref_(input_, coeffs_, stride, tx_type_, bit_depth_);
125 inv_txfm_ref_(coeffs_, output_ref_, stride, tx_type_, bit_depth_);
126 ASM_REGISTER_STATE_CHECK(
127 inv_txfm_(coeffs_, output_, stride, tx_type_, bit_depth_));
129 for (int j = 0; j < num_coeffs_; ++j) {
130 EXPECT_EQ(output_ref_[j], output_[j])
131 << "Not bit-exact result at index: " << j << " At test block: " << i;
136 TEST_P(AV1HighbdInvHTNxN, InvTransResultCheck) { RunBitexactCheck(); }
138 using std::tr1::make_tuple;
140 #if HAVE_SSE4_1
141 #define PARAM_LIST_4X4 \
142 &av1_fwd_txfm2d_4x4_c, &av1_inv_txfm2d_add_4x4_sse4_1, \
143 &av1_inv_txfm2d_add_4x4_c, 16
144 #define PARAM_LIST_8X8 \
145 &av1_fwd_txfm2d_8x8_c, &av1_inv_txfm2d_add_8x8_sse4_1, \
146 &av1_inv_txfm2d_add_8x8_c, 64
147 #define PARAM_LIST_16X16 \
148 &av1_fwd_txfm2d_16x16_c, &av1_inv_txfm2d_add_16x16_sse4_1, \
149 &av1_inv_txfm2d_add_16x16_c, 256
150 #define PARAM_LIST_64X64 \
151 &av1_fwd_txfm2d_64x64_c, &av1_inv_txfm2d_add_64x64_sse4_1, \
152 &av1_inv_txfm2d_add_64x64_c, 4096
154 const IHbdHtParam kArrayIhtParam[] = {
155 // 16x16
156 make_tuple(PARAM_LIST_16X16, DCT_DCT, 10),
157 make_tuple(PARAM_LIST_16X16, DCT_DCT, 12),
158 make_tuple(PARAM_LIST_16X16, ADST_DCT, 10),
159 make_tuple(PARAM_LIST_16X16, ADST_DCT, 12),
160 make_tuple(PARAM_LIST_16X16, DCT_ADST, 10),
161 make_tuple(PARAM_LIST_16X16, DCT_ADST, 12),
162 make_tuple(PARAM_LIST_16X16, ADST_ADST, 10),
163 make_tuple(PARAM_LIST_16X16, ADST_ADST, 12),
164 make_tuple(PARAM_LIST_16X16, FLIPADST_DCT, 10),
165 make_tuple(PARAM_LIST_16X16, FLIPADST_DCT, 12),
166 make_tuple(PARAM_LIST_16X16, DCT_FLIPADST, 10),
167 make_tuple(PARAM_LIST_16X16, DCT_FLIPADST, 12),
168 make_tuple(PARAM_LIST_16X16, FLIPADST_FLIPADST, 10),
169 make_tuple(PARAM_LIST_16X16, FLIPADST_FLIPADST, 12),
170 make_tuple(PARAM_LIST_16X16, ADST_FLIPADST, 10),
171 make_tuple(PARAM_LIST_16X16, ADST_FLIPADST, 12),
172 make_tuple(PARAM_LIST_16X16, FLIPADST_ADST, 10),
173 make_tuple(PARAM_LIST_16X16, FLIPADST_ADST, 12),
174 // 8x8
175 make_tuple(PARAM_LIST_8X8, DCT_DCT, 10),
176 make_tuple(PARAM_LIST_8X8, DCT_DCT, 12),
177 make_tuple(PARAM_LIST_8X8, ADST_DCT, 10),
178 make_tuple(PARAM_LIST_8X8, ADST_DCT, 12),
179 make_tuple(PARAM_LIST_8X8, DCT_ADST, 10),
180 make_tuple(PARAM_LIST_8X8, DCT_ADST, 12),
181 make_tuple(PARAM_LIST_8X8, ADST_ADST, 10),
182 make_tuple(PARAM_LIST_8X8, ADST_ADST, 12),
183 make_tuple(PARAM_LIST_8X8, FLIPADST_DCT, 10),
184 make_tuple(PARAM_LIST_8X8, FLIPADST_DCT, 12),
185 make_tuple(PARAM_LIST_8X8, DCT_FLIPADST, 10),
186 make_tuple(PARAM_LIST_8X8, DCT_FLIPADST, 12),
187 make_tuple(PARAM_LIST_8X8, FLIPADST_FLIPADST, 10),
188 make_tuple(PARAM_LIST_8X8, FLIPADST_FLIPADST, 12),
189 make_tuple(PARAM_LIST_8X8, ADST_FLIPADST, 10),
190 make_tuple(PARAM_LIST_8X8, ADST_FLIPADST, 12),
191 make_tuple(PARAM_LIST_8X8, FLIPADST_ADST, 10),
192 make_tuple(PARAM_LIST_8X8, FLIPADST_ADST, 12),
193 // 4x4
194 make_tuple(PARAM_LIST_4X4, DCT_DCT, 10),
195 make_tuple(PARAM_LIST_4X4, DCT_DCT, 12),
196 make_tuple(PARAM_LIST_4X4, ADST_DCT, 10),
197 make_tuple(PARAM_LIST_4X4, ADST_DCT, 12),
198 make_tuple(PARAM_LIST_4X4, DCT_ADST, 10),
199 make_tuple(PARAM_LIST_4X4, DCT_ADST, 12),
200 make_tuple(PARAM_LIST_4X4, ADST_ADST, 10),
201 make_tuple(PARAM_LIST_4X4, ADST_ADST, 12),
202 make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 10),
203 make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 12),
204 make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 10),
205 make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 12),
206 make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 10),
207 make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 12),
208 make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 10),
209 make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 12),
210 make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 10),
211 make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 12),
212 make_tuple(PARAM_LIST_64X64, DCT_DCT, 10),
213 make_tuple(PARAM_LIST_64X64, DCT_DCT, 12),
216 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdInvHTNxN,
217 ::testing::ValuesIn(kArrayIhtParam));
218 #endif // HAVE_SSE4_1
220 #if HAVE_AVX2
221 #define PARAM_LIST_32X32 \
222 &av1_fwd_txfm2d_32x32_c, &av1_inv_txfm2d_add_32x32_avx2, \
223 &av1_inv_txfm2d_add_32x32_c, 1024
225 const IHbdHtParam kArrayIhtParam32x32[] = {
226 // 32x32
227 make_tuple(PARAM_LIST_32X32, DCT_DCT, 10),
228 make_tuple(PARAM_LIST_32X32, DCT_DCT, 12),
231 INSTANTIATE_TEST_CASE_P(AVX2, AV1HighbdInvHTNxN,
232 ::testing::ValuesIn(kArrayIhtParam32x32));
234 #endif // HAVE_AVX2
235 } // namespace