av1_convolve_ x,y _avx2() -- use 256 bit load/store
[aom.git] / test / av1_fht32x32_test.cc
blob2f654957ed73af600495a6f58b4dc9fe6ad2e640
1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
14 #include "./av1_rtcd.h"
15 #include "./aom_dsp_rtcd.h"
17 #include "test/acm_random.h"
18 #include "test/clear_system_state.h"
19 #include "test/register_state_check.h"
20 #include "test/transform_test_base.h"
21 #include "test/util.h"
22 #include "aom_ports/mem.h"
24 using libaom_test::ACMRandom;
26 namespace {
27 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
28 const TxfmParam *txfm_param);
29 using std::tr1::tuple;
30 using libaom_test::FhtFunc;
31 typedef tuple<FhtFunc, IhtFunc, TX_TYPE, aom_bit_depth_t, int> Ht32x32Param;
33 void fht32x32_ref(const int16_t *in, tran_low_t *out, int stride,
34 TxfmParam *txfm_param) {
35 av1_fht32x32_c(in, out, stride, txfm_param);
38 typedef void (*IHbdHtFunc)(const tran_low_t *in, uint8_t *out, int stride,
39 TX_TYPE tx_type, int bd);
40 typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
41 TX_TYPE tx_type, int bd);
43 // Target optimized function, tx_type, bit depth
44 typedef tuple<HbdHtFunc, TX_TYPE, int> HighbdHt32x32Param;
46 void highbd_fht32x32_ref(const int16_t *in, int32_t *out, int stride,
47 TX_TYPE tx_type, int bd) {
48 av1_fwd_txfm2d_32x32_c(in, out, stride, tx_type, bd);
51 #if (HAVE_SSE2 || HAVE_AVX2) && !CONFIG_DAALA_TX32
52 void dummy_inv_txfm(const tran_low_t *in, uint8_t *out, int stride,
53 const TxfmParam *txfm_param) {
54 (void)in;
55 (void)out;
56 (void)stride;
57 (void)txfm_param;
59 #endif
61 class AV1Trans32x32HT : public libaom_test::TransformTestBase,
62 public ::testing::TestWithParam<Ht32x32Param> {
63 public:
64 virtual ~AV1Trans32x32HT() {}
66 virtual void SetUp() {
67 fwd_txfm_ = GET_PARAM(0);
68 inv_txfm_ = GET_PARAM(1);
69 pitch_ = 32;
70 height_ = 32;
71 fwd_txfm_ref = fht32x32_ref;
72 bit_depth_ = GET_PARAM(3);
73 mask_ = (1 << bit_depth_) - 1;
74 num_coeffs_ = GET_PARAM(4);
75 txfm_param_.tx_type = GET_PARAM(2);
77 virtual void TearDown() { libaom_test::ClearSystemState(); }
79 protected:
80 void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
81 fwd_txfm_(in, out, stride, &txfm_param_);
84 void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
85 inv_txfm_(out, dst, stride, &txfm_param_);
88 FhtFunc fwd_txfm_;
89 IhtFunc inv_txfm_;
92 TEST_P(AV1Trans32x32HT, CoeffCheck) { RunCoeffCheck(); }
93 TEST_P(AV1Trans32x32HT, MemCheck) { RunMemCheck(); }
95 class AV1HighbdTrans32x32HT
96 : public ::testing::TestWithParam<HighbdHt32x32Param> {
97 public:
98 virtual ~AV1HighbdTrans32x32HT() {}
100 virtual void SetUp() {
101 fwd_txfm_ = GET_PARAM(0);
102 fwd_txfm_ref_ = highbd_fht32x32_ref;
103 tx_type_ = GET_PARAM(1);
104 bit_depth_ = GET_PARAM(2);
105 mask_ = (1 << bit_depth_) - 1;
106 num_coeffs_ = 1024;
108 input_ = reinterpret_cast<int16_t *>(
109 aom_memalign(32, sizeof(int16_t) * num_coeffs_));
110 output_ = reinterpret_cast<int32_t *>(
111 aom_memalign(32, sizeof(int32_t) * num_coeffs_));
112 output_ref_ = reinterpret_cast<int32_t *>(
113 aom_memalign(32, sizeof(int32_t) * num_coeffs_));
116 virtual void TearDown() {
117 aom_free(input_);
118 aom_free(output_);
119 aom_free(output_ref_);
120 libaom_test::ClearSystemState();
123 protected:
124 void RunBitexactCheck();
126 private:
127 HbdHtFunc fwd_txfm_;
128 HbdHtFunc fwd_txfm_ref_;
129 TX_TYPE tx_type_;
130 int bit_depth_;
131 int mask_;
132 int num_coeffs_;
133 int16_t *input_;
134 int32_t *output_;
135 int32_t *output_ref_;
138 void AV1HighbdTrans32x32HT::RunBitexactCheck() {
139 ACMRandom rnd(ACMRandom::DeterministicSeed());
140 int i, j;
141 const int stride = 32;
142 const int num_tests = 1000;
144 for (i = 0; i < num_tests; ++i) {
145 for (j = 0; j < num_coeffs_; ++j) {
146 input_[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
149 fwd_txfm_ref_(input_, output_ref_, stride, tx_type_, bit_depth_);
150 ASM_REGISTER_STATE_CHECK(
151 fwd_txfm_(input_, output_, stride, tx_type_, bit_depth_));
153 for (j = 0; j < num_coeffs_; ++j) {
154 EXPECT_EQ(output_ref_[j], output_[j])
155 << "Not bit-exact result at index: " << j << " at test block: " << i;
160 TEST_P(AV1HighbdTrans32x32HT, HighbdCoeffCheck) { RunBitexactCheck(); }
162 using std::tr1::make_tuple;
164 #if HAVE_SSE2 && !CONFIG_DAALA_TX32
165 const Ht32x32Param kArrayHt32x32Param_sse2[] = {
166 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, DCT_DCT, AOM_BITS_8, 1024),
167 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, ADST_DCT, AOM_BITS_8, 1024),
168 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, DCT_ADST, AOM_BITS_8, 1024),
169 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, ADST_ADST, AOM_BITS_8, 1024),
170 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, FLIPADST_DCT, AOM_BITS_8,
171 1024),
172 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, DCT_FLIPADST, AOM_BITS_8,
173 1024),
174 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, FLIPADST_FLIPADST, AOM_BITS_8,
175 1024),
176 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, ADST_FLIPADST, AOM_BITS_8,
177 1024),
178 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, FLIPADST_ADST, AOM_BITS_8,
179 1024),
180 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, IDTX, AOM_BITS_8, 1024),
181 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, V_DCT, AOM_BITS_8, 1024),
182 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, H_DCT, AOM_BITS_8, 1024),
183 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, V_ADST, AOM_BITS_8, 1024),
184 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, H_ADST, AOM_BITS_8, 1024),
185 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, V_FLIPADST, AOM_BITS_8, 1024),
186 make_tuple(&av1_fht32x32_sse2, &dummy_inv_txfm, H_FLIPADST, AOM_BITS_8, 1024)
188 INSTANTIATE_TEST_CASE_P(SSE2, AV1Trans32x32HT,
189 ::testing::ValuesIn(kArrayHt32x32Param_sse2));
190 #endif // HAVE_SSE2 && !CONFIG_DAALA_TX32
192 #if HAVE_AVX2 && !CONFIG_DAALA_TX32
193 const Ht32x32Param kArrayHt32x32Param_avx2[] = {
194 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, DCT_DCT, AOM_BITS_8, 1024),
195 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, ADST_DCT, AOM_BITS_8, 1024),
196 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, DCT_ADST, AOM_BITS_8, 1024),
197 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, ADST_ADST, AOM_BITS_8, 1024),
198 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, FLIPADST_DCT, AOM_BITS_8,
199 1024),
200 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, DCT_FLIPADST, AOM_BITS_8,
201 1024),
202 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, FLIPADST_FLIPADST, AOM_BITS_8,
203 1024),
204 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, ADST_FLIPADST, AOM_BITS_8,
205 1024),
206 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, FLIPADST_ADST, AOM_BITS_8,
207 1024),
208 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, IDTX, AOM_BITS_8, 1024),
209 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, V_DCT, AOM_BITS_8, 1024),
210 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, H_DCT, AOM_BITS_8, 1024),
211 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, V_ADST, AOM_BITS_8, 1024),
212 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, H_ADST, AOM_BITS_8, 1024),
213 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, V_FLIPADST, AOM_BITS_8, 1024),
214 make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, H_FLIPADST, AOM_BITS_8, 1024)
216 INSTANTIATE_TEST_CASE_P(AVX2, AV1Trans32x32HT,
217 ::testing::ValuesIn(kArrayHt32x32Param_avx2));
218 #endif // HAVE_AVX2 && !CONFIG_DAALA_TX32
219 } // namespace