av1_convolve_ x,y _avx2() -- use 256 bit load/store
[aom.git] / test / transform_test_base.h
blob16d003da0844d25bd6b38809f93885ad378ad83c
1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #ifndef TEST_TRANSFORM_TEST_BASE_H_
13 #define TEST_TRANSFORM_TEST_BASE_H_
15 #include "./aom_config.h"
16 #include "aom_mem/aom_mem.h"
17 #include "aom/aom_codec.h"
18 #include "aom_dsp/txfm_common.h"
20 namespace libaom_test {
22 // Note:
23 // Same constant are defined in av1/common/av1_entropy.h and
24 // av1/common/entropy.h. Goal is to make this base class
25 // to use for future codec transform testing. But including
26 // either of them would lead to compiling error when we do
27 // unit test for another codec. Suggest to move the definition
28 // to a aom header file.
29 const int kDctMaxValue = 16384;
31 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
32 TxfmParam *txfm_param);
34 typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
35 const TxfmParam *txfm_param);
37 class TransformTestBase {
38 public:
39 virtual ~TransformTestBase() {}
41 protected:
42 virtual void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) = 0;
44 virtual void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) = 0;
46 void RunAccuracyCheck(uint32_t ref_max_error, double ref_avg_error) {
47 ACMRandom rnd(ACMRandom::DeterministicSeed());
48 uint32_t max_error = 0;
49 int64_t total_error = 0;
50 const int count_test_block = 10000;
52 int16_t *test_input_block = reinterpret_cast<int16_t *>(
53 aom_memalign(16, sizeof(int16_t) * num_coeffs_));
54 tran_low_t *test_temp_block = reinterpret_cast<tran_low_t *>(
55 aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
56 uint8_t *dst = reinterpret_cast<uint8_t *>(
57 aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
58 uint8_t *src = reinterpret_cast<uint8_t *>(
59 aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
60 uint16_t *dst16 = reinterpret_cast<uint16_t *>(
61 aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
62 uint16_t *src16 = reinterpret_cast<uint16_t *>(
63 aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
65 for (int i = 0; i < count_test_block; ++i) {
66 // Initialize a test block with input range [-255, 255].
67 for (int j = 0; j < num_coeffs_; ++j) {
68 if (bit_depth_ == AOM_BITS_8) {
69 src[j] = rnd.Rand8();
70 dst[j] = rnd.Rand8();
71 test_input_block[j] = src[j] - dst[j];
72 } else {
73 src16[j] = rnd.Rand16() & mask_;
74 dst16[j] = rnd.Rand16() & mask_;
75 test_input_block[j] = src16[j] - dst16[j];
79 ASM_REGISTER_STATE_CHECK(
80 RunFwdTxfm(test_input_block, test_temp_block, pitch_));
81 if (bit_depth_ == AOM_BITS_8) {
82 ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
83 } else {
84 ASM_REGISTER_STATE_CHECK(
85 RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
88 for (int j = 0; j < num_coeffs_; ++j) {
89 const int diff =
90 bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
91 const uint32_t error = diff * diff;
92 if (max_error < error) max_error = error;
93 total_error += error;
97 double avg_error = total_error * 1. / count_test_block / num_coeffs_;
99 EXPECT_GE(ref_max_error, max_error)
100 << "Error: FHT/IHT has an individual round trip error > "
101 << ref_max_error;
103 EXPECT_GE(ref_avg_error, avg_error)
104 << "Error: FHT/IHT has average round trip error > " << ref_avg_error
105 << " per block";
107 aom_free(test_input_block);
108 aom_free(test_temp_block);
109 aom_free(dst);
110 aom_free(src);
111 aom_free(dst16);
112 aom_free(src16);
115 void RunCoeffCheck() {
116 ACMRandom rnd(ACMRandom::DeterministicSeed());
117 const int count_test_block = 5000;
119 // Use a stride value which is not the width of any transform, to catch
120 // cases where the transforms use the stride incorrectly.
121 int stride = 96;
123 int16_t *input_block = reinterpret_cast<int16_t *>(
124 aom_memalign(16, sizeof(int16_t) * stride * height_));
125 tran_low_t *output_ref_block = reinterpret_cast<tran_low_t *>(
126 aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
127 tran_low_t *output_block = reinterpret_cast<tran_low_t *>(
128 aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
130 for (int i = 0; i < count_test_block; ++i) {
131 int j, k;
132 for (j = 0; j < height_; ++j) {
133 for (k = 0; k < pitch_; ++k) {
134 int in_idx = j * stride + k;
135 int out_idx = j * pitch_ + k;
136 input_block[in_idx] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
137 if (bit_depth_ == AOM_BITS_8) {
138 output_block[out_idx] = output_ref_block[out_idx] = rnd.Rand8();
139 } else {
140 output_block[out_idx] = output_ref_block[out_idx] =
141 rnd.Rand16() & mask_;
146 fwd_txfm_ref(input_block, output_ref_block, stride, &txfm_param_);
147 ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, stride));
149 // The minimum quant value is 4.
150 for (j = 0; j < height_; ++j) {
151 for (k = 0; k < pitch_; ++k) {
152 int out_idx = j * pitch_ + k;
153 ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx])
154 << "Error: not bit-exact result at index: " << out_idx
155 << " at test block: " << i;
159 aom_free(input_block);
160 aom_free(output_ref_block);
161 aom_free(output_block);
164 void RunInvCoeffCheck() {
165 ACMRandom rnd(ACMRandom::DeterministicSeed());
166 const int count_test_block = 5000;
168 // Use a stride value which is not the width of any transform, to catch
169 // cases where the transforms use the stride incorrectly.
170 int stride = 96;
172 int16_t *input_block = reinterpret_cast<int16_t *>(
173 aom_memalign(16, sizeof(int16_t) * num_coeffs_));
174 tran_low_t *trans_block = reinterpret_cast<tran_low_t *>(
175 aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
176 uint8_t *output_block = reinterpret_cast<uint8_t *>(
177 aom_memalign(16, sizeof(uint8_t) * stride * height_));
178 uint8_t *output_ref_block = reinterpret_cast<uint8_t *>(
179 aom_memalign(16, sizeof(uint8_t) * stride * height_));
181 for (int i = 0; i < count_test_block; ++i) {
182 // Initialize a test block with input range [-mask_, mask_].
183 int j, k;
184 for (j = 0; j < height_; ++j) {
185 for (k = 0; k < pitch_; ++k) {
186 int in_idx = j * pitch_ + k;
187 int out_idx = j * stride + k;
188 input_block[in_idx] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
189 output_ref_block[out_idx] = rnd.Rand16() & mask_;
190 output_block[out_idx] = output_ref_block[out_idx];
194 fwd_txfm_ref(input_block, trans_block, pitch_, &txfm_param_);
196 inv_txfm_ref(trans_block, output_ref_block, stride, &txfm_param_);
197 ASM_REGISTER_STATE_CHECK(RunInvTxfm(trans_block, output_block, stride));
199 for (j = 0; j < height_; ++j) {
200 for (k = 0; k < pitch_; ++k) {
201 int out_idx = j * stride + k;
202 ASSERT_EQ(output_block[out_idx], output_ref_block[out_idx])
203 << "Error: not bit-exact result at index: " << out_idx
204 << " j = " << j << " k = " << k << " at test block: " << i;
208 aom_free(input_block);
209 aom_free(trans_block);
210 aom_free(output_ref_block);
211 aom_free(output_block);
214 void RunMemCheck() {
215 ACMRandom rnd(ACMRandom::DeterministicSeed());
216 const int count_test_block = 5000;
218 int16_t *input_extreme_block = reinterpret_cast<int16_t *>(
219 aom_memalign(16, sizeof(int16_t) * num_coeffs_));
220 tran_low_t *output_ref_block = reinterpret_cast<tran_low_t *>(
221 aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
222 tran_low_t *output_block = reinterpret_cast<tran_low_t *>(
223 aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
225 for (int i = 0; i < count_test_block; ++i) {
226 // Initialize a test block with input range [-mask_, mask_].
227 for (int j = 0; j < num_coeffs_; ++j) {
228 input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
230 if (i == 0) {
231 for (int j = 0; j < num_coeffs_; ++j) input_extreme_block[j] = mask_;
232 } else if (i == 1) {
233 for (int j = 0; j < num_coeffs_; ++j) input_extreme_block[j] = -mask_;
236 fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, &txfm_param_);
237 ASM_REGISTER_STATE_CHECK(
238 RunFwdTxfm(input_extreme_block, output_block, pitch_));
240 int row_length = FindRowLength();
241 // The minimum quant value is 4.
242 for (int j = 0; j < num_coeffs_; ++j) {
243 ASSERT_EQ(output_block[j], output_ref_block[j])
244 << "Not bit-exact at test index: " << i << ", "
245 << "j = " << j << std::endl;
246 EXPECT_GE(row_length * kDctMaxValue << (bit_depth_ - 8),
247 abs(output_block[j]))
248 << "Error: NxN FDCT has coefficient larger than N*DCT_MAX_VALUE";
251 aom_free(input_extreme_block);
252 aom_free(output_ref_block);
253 aom_free(output_block);
256 void RunInvAccuracyCheck(int limit) {
257 ACMRandom rnd(ACMRandom::DeterministicSeed());
258 const int count_test_block = 1000;
260 int16_t *in = reinterpret_cast<int16_t *>(
261 aom_memalign(16, sizeof(int16_t) * num_coeffs_));
262 tran_low_t *coeff = reinterpret_cast<tran_low_t *>(
263 aom_memalign(16, sizeof(tran_low_t) * num_coeffs_));
264 uint8_t *dst = reinterpret_cast<uint8_t *>(
265 aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
266 uint8_t *src = reinterpret_cast<uint8_t *>(
267 aom_memalign(16, sizeof(uint8_t) * num_coeffs_));
269 uint16_t *dst16 = reinterpret_cast<uint16_t *>(
270 aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
271 uint16_t *src16 = reinterpret_cast<uint16_t *>(
272 aom_memalign(16, sizeof(uint16_t) * num_coeffs_));
274 for (int i = 0; i < count_test_block; ++i) {
275 // Initialize a test block with input range [-mask_, mask_].
276 for (int j = 0; j < num_coeffs_; ++j) {
277 if (bit_depth_ == AOM_BITS_8) {
278 src[j] = rnd.Rand8();
279 dst[j] = rnd.Rand8();
280 in[j] = src[j] - dst[j];
281 } else {
282 src16[j] = rnd.Rand16() & mask_;
283 dst16[j] = rnd.Rand16() & mask_;
284 in[j] = src16[j] - dst16[j];
288 fwd_txfm_ref(in, coeff, pitch_, &txfm_param_);
290 if (bit_depth_ == AOM_BITS_8) {
291 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
292 } else {
293 ASM_REGISTER_STATE_CHECK(
294 RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
297 for (int j = 0; j < num_coeffs_; ++j) {
298 const int diff =
299 bit_depth_ == AOM_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
300 const uint32_t error = diff * diff;
301 ASSERT_GE(static_cast<uint32_t>(limit), error)
302 << "Error: 4x4 IDCT has error " << error << " at index " << j;
305 aom_free(in);
306 aom_free(coeff);
307 aom_free(dst);
308 aom_free(src);
309 aom_free(src16);
310 aom_free(dst16);
313 int pitch_;
314 int height_;
315 FhtFunc fwd_txfm_ref;
316 IhtFunc inv_txfm_ref;
317 aom_bit_depth_t bit_depth_;
318 int mask_;
319 int num_coeffs_;
320 TxfmParam txfm_param_;
322 private:
323 // Assume transform size is 4x4, 8x8, 16x16,...
324 int FindRowLength() const {
325 int row = 4;
326 if (16 == num_coeffs_) {
327 row = 4;
328 } else if (64 == num_coeffs_) {
329 row = 8;
330 } else if (256 == num_coeffs_) {
331 row = 16;
332 } else if (1024 == num_coeffs_) {
333 row = 32;
335 return row;
339 } // namespace libaom_test
341 #endif // TEST_TRANSFORM_TEST_BASE_H_