av1_convolve_ x,y _avx2() -- use 256 bit load/store
[aom.git] / test / av1_highbd_iht_test.cc
blobb0358a44b54ac90452f56181c7b7d6f336fade54
1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
14 #include "./av1_rtcd.h"
15 #include "test/acm_random.h"
16 #include "test/clear_system_state.h"
17 #include "test/register_state_check.h"
18 #include "test/util.h"
19 #include "av1/common/enums.h"
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_ports/mem.h"
23 namespace {
25 using std::tr1::tuple;
26 using libaom_test::ACMRandom;
28 typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
29 TX_TYPE tx_type, int bd);
31 typedef void (*IHbdHtFunc)(const int32_t *coeff, uint16_t *output, int stride,
32 TX_TYPE tx_type, int bd);
34 // Test parameter argument list:
35 // <transform reference function,
36 // optimized inverse transform function,
37 // inverse transform reference function,
38 // num_coeffs,
39 // tx_type,
40 // bit_depth>
41 typedef tuple<HbdHtFunc, IHbdHtFunc, IHbdHtFunc, int, TX_TYPE, int> IHbdHtParam;
43 class AV1HighbdInvHTNxN : public ::testing::TestWithParam<IHbdHtParam> {
44 public:
45 virtual ~AV1HighbdInvHTNxN() {}
47 virtual void SetUp() {
48 txfm_ref_ = GET_PARAM(0);
49 inv_txfm_ = GET_PARAM(1);
50 inv_txfm_ref_ = GET_PARAM(2);
51 num_coeffs_ = GET_PARAM(3);
52 tx_type_ = GET_PARAM(4);
53 bit_depth_ = GET_PARAM(5);
55 input_ = reinterpret_cast<int16_t *>(
56 aom_memalign(16, sizeof(input_[0]) * num_coeffs_));
58 // Note:
59 // Inverse transform input buffer is 32-byte aligned
60 // Refer to <root>/av1/encoder/context_tree.c, function,
61 // void alloc_mode_context().
62 coeffs_ = reinterpret_cast<int32_t *>(
63 aom_memalign(32, sizeof(coeffs_[0]) * num_coeffs_));
64 output_ = reinterpret_cast<uint16_t *>(
65 aom_memalign(32, sizeof(output_[0]) * num_coeffs_));
66 output_ref_ = reinterpret_cast<uint16_t *>(
67 aom_memalign(32, sizeof(output_ref_[0]) * num_coeffs_));
70 virtual void TearDown() {
71 aom_free(input_);
72 aom_free(coeffs_);
73 aom_free(output_);
74 aom_free(output_ref_);
75 libaom_test::ClearSystemState();
78 protected:
79 void RunBitexactCheck();
81 private:
82 int GetStride() const {
83 if (16 == num_coeffs_) {
84 return 4;
85 } else if (64 == num_coeffs_) {
86 return 8;
87 } else if (256 == num_coeffs_) {
88 return 16;
89 } else if (1024 == num_coeffs_) {
90 return 32;
91 } else {
92 return 0;
96 HbdHtFunc txfm_ref_;
97 IHbdHtFunc inv_txfm_;
98 IHbdHtFunc inv_txfm_ref_;
99 int num_coeffs_;
100 TX_TYPE tx_type_;
101 int bit_depth_;
103 int16_t *input_;
104 int32_t *coeffs_;
105 uint16_t *output_;
106 uint16_t *output_ref_;
109 void AV1HighbdInvHTNxN::RunBitexactCheck() {
110 ACMRandom rnd(ACMRandom::DeterministicSeed());
111 const int stride = GetStride();
112 const int num_tests = 20000;
113 const uint16_t mask = (1 << bit_depth_) - 1;
115 for (int i = 0; i < num_tests; ++i) {
116 for (int j = 0; j < num_coeffs_; ++j) {
117 input_[j] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask);
118 output_ref_[j] = rnd.Rand16() & mask;
119 output_[j] = output_ref_[j];
122 txfm_ref_(input_, coeffs_, stride, tx_type_, bit_depth_);
123 inv_txfm_ref_(coeffs_, output_ref_, stride, tx_type_, bit_depth_);
124 ASM_REGISTER_STATE_CHECK(
125 inv_txfm_(coeffs_, output_, stride, tx_type_, bit_depth_));
127 for (int j = 0; j < num_coeffs_; ++j) {
128 EXPECT_EQ(output_ref_[j], output_[j])
129 << "Not bit-exact result at index: " << j << " At test block: " << i;
134 TEST_P(AV1HighbdInvHTNxN, InvTransResultCheck) { RunBitexactCheck(); }
136 using std::tr1::make_tuple;
138 #if HAVE_SSE4_1 && !(CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16)
139 #if !CONFIG_DAALA_TX4
140 #define PARAM_LIST_4X4 \
141 &av1_fwd_txfm2d_4x4_c, &av1_inv_txfm2d_add_4x4_sse4_1, \
142 &av1_inv_txfm2d_add_4x4_c, 16
143 #endif
144 #if !CONFIG_DAALA_TX8
145 #define PARAM_LIST_8X8 \
146 &av1_fwd_txfm2d_8x8_c, &av1_inv_txfm2d_add_8x8_sse4_1, \
147 &av1_inv_txfm2d_add_8x8_c, 64
148 #endif
149 #if !CONFIG_DAALA_TX16
150 #define PARAM_LIST_16X16 \
151 &av1_fwd_txfm2d_16x16_c, &av1_inv_txfm2d_add_16x16_sse4_1, \
152 &av1_inv_txfm2d_add_16x16_c, 256
153 #endif
154 const IHbdHtParam kArrayIhtParam[] = {
155 // 16x16
156 #if !CONFIG_DAALA_TX16
157 make_tuple(PARAM_LIST_16X16, DCT_DCT, 10),
158 make_tuple(PARAM_LIST_16X16, DCT_DCT, 12),
159 make_tuple(PARAM_LIST_16X16, ADST_DCT, 10),
160 make_tuple(PARAM_LIST_16X16, ADST_DCT, 12),
161 make_tuple(PARAM_LIST_16X16, DCT_ADST, 10),
162 make_tuple(PARAM_LIST_16X16, DCT_ADST, 12),
163 make_tuple(PARAM_LIST_16X16, ADST_ADST, 10),
164 make_tuple(PARAM_LIST_16X16, ADST_ADST, 12),
165 make_tuple(PARAM_LIST_16X16, FLIPADST_DCT, 10),
166 make_tuple(PARAM_LIST_16X16, FLIPADST_DCT, 12),
167 make_tuple(PARAM_LIST_16X16, DCT_FLIPADST, 10),
168 make_tuple(PARAM_LIST_16X16, DCT_FLIPADST, 12),
169 make_tuple(PARAM_LIST_16X16, FLIPADST_FLIPADST, 10),
170 make_tuple(PARAM_LIST_16X16, FLIPADST_FLIPADST, 12),
171 make_tuple(PARAM_LIST_16X16, ADST_FLIPADST, 10),
172 make_tuple(PARAM_LIST_16X16, ADST_FLIPADST, 12),
173 make_tuple(PARAM_LIST_16X16, FLIPADST_ADST, 10),
174 make_tuple(PARAM_LIST_16X16, FLIPADST_ADST, 12),
175 #endif
176 // 8x8
177 #if !CONFIG_DAALA_TX8
178 make_tuple(PARAM_LIST_8X8, DCT_DCT, 10),
179 make_tuple(PARAM_LIST_8X8, DCT_DCT, 12),
180 make_tuple(PARAM_LIST_8X8, ADST_DCT, 10),
181 make_tuple(PARAM_LIST_8X8, ADST_DCT, 12),
182 make_tuple(PARAM_LIST_8X8, DCT_ADST, 10),
183 make_tuple(PARAM_LIST_8X8, DCT_ADST, 12),
184 make_tuple(PARAM_LIST_8X8, ADST_ADST, 10),
185 make_tuple(PARAM_LIST_8X8, ADST_ADST, 12),
186 make_tuple(PARAM_LIST_8X8, FLIPADST_DCT, 10),
187 make_tuple(PARAM_LIST_8X8, FLIPADST_DCT, 12),
188 make_tuple(PARAM_LIST_8X8, DCT_FLIPADST, 10),
189 make_tuple(PARAM_LIST_8X8, DCT_FLIPADST, 12),
190 make_tuple(PARAM_LIST_8X8, FLIPADST_FLIPADST, 10),
191 make_tuple(PARAM_LIST_8X8, FLIPADST_FLIPADST, 12),
192 make_tuple(PARAM_LIST_8X8, ADST_FLIPADST, 10),
193 make_tuple(PARAM_LIST_8X8, ADST_FLIPADST, 12),
194 make_tuple(PARAM_LIST_8X8, FLIPADST_ADST, 10),
195 make_tuple(PARAM_LIST_8X8, FLIPADST_ADST, 12),
196 #endif
197 // 4x4
198 #if !CONFIG_DAALA_TX4
199 make_tuple(PARAM_LIST_4X4, DCT_DCT, 10),
200 make_tuple(PARAM_LIST_4X4, DCT_DCT, 12),
201 make_tuple(PARAM_LIST_4X4, ADST_DCT, 10),
202 make_tuple(PARAM_LIST_4X4, ADST_DCT, 12),
203 make_tuple(PARAM_LIST_4X4, DCT_ADST, 10),
204 make_tuple(PARAM_LIST_4X4, DCT_ADST, 12),
205 make_tuple(PARAM_LIST_4X4, ADST_ADST, 10),
206 make_tuple(PARAM_LIST_4X4, ADST_ADST, 12),
207 make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 10),
208 make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 12),
209 make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 10),
210 make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 12),
211 make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 10),
212 make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 12),
213 make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 10),
214 make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 12),
215 make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 10),
216 make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 12),
217 #endif
220 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdInvHTNxN,
221 ::testing::ValuesIn(kArrayIhtParam));
222 #endif // HAVE_SSE4_1 &&
223 // !(CONFIG_DAALA_TX4 && CONFIG_DAALA_TX8 && CONFIG_DAALA_TX16)
225 #if HAVE_AVX2 && !CONFIG_DAALA_TX32
226 #define PARAM_LIST_32X32 \
227 &av1_fwd_txfm2d_32x32_c, &av1_inv_txfm2d_add_32x32_avx2, \
228 &av1_inv_txfm2d_add_32x32_c, 1024
230 const IHbdHtParam kArrayIhtParam32x32[] = {
231 // 32x32
232 make_tuple(PARAM_LIST_32X32, DCT_DCT, 10),
233 make_tuple(PARAM_LIST_32X32, DCT_DCT, 12),
236 INSTANTIATE_TEST_CASE_P(AVX2, AV1HighbdInvHTNxN,
237 ::testing::ValuesIn(kArrayIhtParam32x32));
239 #endif // HAVE_AVX2
240 } // namespace