av1_convolve_ x,y _avx2() -- use 256 bit load/store
[aom.git] / test / av1_txfm_test.cc
blobdc11009e01fa3424b0e62300cbf0140b49c467b1
1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #include <stdio.h>
13 #include "test/av1_txfm_test.h"
15 namespace libaom_test {
17 int get_txfm1d_size(TX_SIZE tx_size) { return tx_size_wide[tx_size]; }
19 void get_txfm1d_type(TX_TYPE txfm2d_type, TYPE_TXFM *type0, TYPE_TXFM *type1) {
20 switch (txfm2d_type) {
21 case DCT_DCT:
22 *type0 = TYPE_DCT;
23 *type1 = TYPE_DCT;
24 break;
25 case ADST_DCT:
26 *type0 = TYPE_ADST;
27 *type1 = TYPE_DCT;
28 break;
29 case DCT_ADST:
30 *type0 = TYPE_DCT;
31 *type1 = TYPE_ADST;
32 break;
33 case ADST_ADST:
34 *type0 = TYPE_ADST;
35 *type1 = TYPE_ADST;
36 break;
37 case FLIPADST_DCT:
38 *type0 = TYPE_ADST;
39 *type1 = TYPE_DCT;
40 break;
41 case DCT_FLIPADST:
42 *type0 = TYPE_DCT;
43 *type1 = TYPE_ADST;
44 break;
45 case FLIPADST_FLIPADST:
46 *type0 = TYPE_ADST;
47 *type1 = TYPE_ADST;
48 break;
49 case ADST_FLIPADST:
50 *type0 = TYPE_ADST;
51 *type1 = TYPE_ADST;
52 break;
53 case FLIPADST_ADST:
54 *type0 = TYPE_ADST;
55 *type1 = TYPE_ADST;
56 break;
57 default:
58 *type0 = TYPE_DCT;
59 *type1 = TYPE_DCT;
60 assert(0);
61 break;
65 double invSqrt2 = 1 / pow(2, 0.5);
67 double dct_matrix(double n, double k, int size) {
68 return cos(M_PI * (2 * n + 1) * k / (2 * size));
71 void reference_dct_1d(const double *in, double *out, int size) {
72 for (int k = 0; k < size; ++k) {
73 out[k] = 0;
74 for (int n = 0; n < size; ++n) {
75 out[k] += in[n] * dct_matrix(n, k, size);
77 if (k == 0) out[k] = out[k] * invSqrt2;
81 void reference_idct_1d(const double *in, double *out, int size) {
82 for (int k = 0; k < size; ++k) {
83 out[k] = 0;
84 for (int n = 0; n < size; ++n) {
85 if (n == 0)
86 out[k] += invSqrt2 * in[n] * dct_matrix(k, n, size);
87 else
88 out[k] += in[n] * dct_matrix(k, n, size);
93 // TODO(any): Copied from dct.c. Should be replaced by a proper reference
94 // function that takes 'double' input & output.
95 static void fadst4(const tran_low_t *input, tran_low_t *output) {
96 tran_high_t x0, x1, x2, x3;
97 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
99 x0 = input[0];
100 x1 = input[1];
101 x2 = input[2];
102 x3 = input[3];
104 if (!(x0 | x1 | x2 | x3)) {
105 output[0] = output[1] = output[2] = output[3] = 0;
106 return;
109 s0 = sinpi_1_9 * x0;
110 s1 = sinpi_4_9 * x0;
111 s2 = sinpi_2_9 * x1;
112 s3 = sinpi_1_9 * x1;
113 s4 = sinpi_3_9 * x2;
114 s5 = sinpi_4_9 * x3;
115 s6 = sinpi_2_9 * x3;
116 s7 = x0 + x1 - x3;
118 x0 = s0 + s2 + s5;
119 x1 = sinpi_3_9 * s7;
120 x2 = s1 - s3 + s6;
121 x3 = s4;
123 s0 = x0 + x3;
124 s1 = x1;
125 s2 = x2 - x3;
126 s3 = x2 - x0 + x3;
128 // 1-D transform scaling factor is sqrt(2).
129 output[0] = (tran_low_t)fdct_round_shift(s0);
130 output[1] = (tran_low_t)fdct_round_shift(s1);
131 output[2] = (tran_low_t)fdct_round_shift(s2);
132 output[3] = (tran_low_t)fdct_round_shift(s3);
135 void reference_adst_1d(const double *in, double *out, int size) {
136 if (size == 4) { // Special case.
137 tran_low_t int_input[4];
138 for (int i = 0; i < 4; ++i) {
139 int_input[i] = static_cast<tran_low_t>(round(in[i]));
141 tran_low_t int_output[4];
142 fadst4(int_input, int_output);
143 for (int i = 0; i < 4; ++i) {
144 out[i] = int_output[i];
146 return;
149 for (int k = 0; k < size; ++k) {
150 out[k] = 0;
151 for (int n = 0; n < size; ++n) {
152 out[k] += in[n] * sin(M_PI * (2 * n + 1) * (2 * k + 1) / (4 * size));
157 void reference_hybrid_1d(double *in, double *out, int size, int type) {
158 if (type == TYPE_DCT)
159 reference_dct_1d(in, out, size);
160 else
161 reference_adst_1d(in, out, size);
164 double get_amplification_factor(TX_TYPE tx_type, TX_SIZE tx_size) {
165 TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg;
166 av1_get_fwd_txfm_cfg(tx_type, tx_size, &fwd_txfm_flip_cfg);
167 const int tx_width = fwd_txfm_flip_cfg.row_cfg->txfm_size;
168 const int tx_height = fwd_txfm_flip_cfg.col_cfg->txfm_size;
169 const int8_t *shift = (tx_width > tx_height)
170 ? fwd_txfm_flip_cfg.row_cfg->shift
171 : fwd_txfm_flip_cfg.col_cfg->shift;
172 const int amplify_bit = shift[0] + shift[1] + shift[2];
173 double amplify_factor =
174 amplify_bit >= 0 ? (1 << amplify_bit) : (1.0 / (1 << -amplify_bit));
176 // For rectangular transforms, we need to multiply by an extra factor.
177 const int rect_type = get_rect_tx_log_ratio(tx_width, tx_height);
178 if (abs(rect_type) == 1) {
179 amplify_factor *= pow(2, 0.5);
180 } else if (abs(rect_type) == 2) {
181 const int tx_max_dim = AOMMAX(tx_width, tx_height);
182 const int rect_type2_shift = (tx_max_dim >= 32) ? 2 : 1;
183 amplify_factor *= pow(2, rect_type2_shift);
185 return amplify_factor;
188 void reference_hybrid_2d(double *in, double *out, TX_TYPE tx_type,
189 TX_SIZE tx_size) {
190 // Get transform type and size of each dimension.
191 TYPE_TXFM type0;
192 TYPE_TXFM type1;
193 get_txfm1d_type(tx_type, &type0, &type1);
194 const int tx_width = tx_size_wide[tx_size];
195 const int tx_height = tx_size_high[tx_size];
197 double *const temp_in = new double[AOMMAX(tx_width, tx_height)];
198 double *const temp_out = new double[AOMMAX(tx_width, tx_height)];
199 double *const out_interm = new double[tx_width * tx_height];
200 const int stride = tx_width;
202 // Transform columns.
203 for (int c = 0; c < tx_width; ++c) {
204 for (int r = 0; r < tx_height; ++r) {
205 temp_in[r] = in[r * stride + c];
207 reference_hybrid_1d(temp_in, temp_out, tx_height, type0);
208 for (int r = 0; r < tx_height; ++r) {
209 out_interm[r * stride + c] = temp_out[r];
213 // Transform rows.
214 for (int r = 0; r < tx_height; ++r) {
215 reference_hybrid_1d(out_interm + r * stride, out + r * stride, tx_width,
216 type1);
219 delete[] temp_in;
220 delete[] temp_out;
221 delete[] out_interm;
223 #if CONFIG_TX64X64
224 // These transforms use an approximate 2D DCT transform, by only keeping the
225 // top-left quarter of the coefficients, and repacking them in the first
226 // quarter indices.
227 // TODO(urvang): Refactor this code.
228 if (tx_width == 64 && tx_height == 64) { // tx_size == TX_64X64
229 // Zero out top-right 32x32 area.
230 for (int row = 0; row < 32; ++row) {
231 memset(out + row * 64 + 32, 0, 32 * sizeof(*out));
233 // Zero out the bottom 64x32 area.
234 memset(out + 32 * 64, 0, 32 * 64 * sizeof(*out));
235 // Re-pack non-zero coeffs in the first 32x32 indices.
236 for (int row = 1; row < 32; ++row) {
237 memcpy(out + row * 32, out + row * 64, 32 * sizeof(*out));
239 } else if (tx_width == 32 && tx_height == 64) { // tx_size == TX_32X64
240 // Zero out the bottom 32x32 area.
241 memset(out + 32 * 32, 0, 32 * 32 * sizeof(*out));
242 // Note: no repacking needed here.
243 } else if (tx_width == 64 && tx_height == 32) { // tx_size == TX_64X32
244 // Zero out right 32x32 area.
245 for (int row = 0; row < 32; ++row) {
246 memset(out + row * 64 + 32, 0, 32 * sizeof(*out));
248 // Re-pack non-zero coeffs in the first 32x32 indices.
249 for (int row = 1; row < 32; ++row) {
250 memcpy(out + row * 32, out + row * 64, 32 * sizeof(*out));
252 } else if (tx_width == 16 && tx_height == 64) { // tx_size == TX_16X64
253 // Zero out the bottom 16x32 area.
254 memset(out + 16 * 32, 0, 16 * 32 * sizeof(*out));
255 // Note: no repacking needed here.
256 } else if (tx_width == 64 && tx_height == 16) { // tx_size == TX_64X16
257 // Zero out right 32x16 area.
258 for (int row = 0; row < 16; ++row) {
259 memset(out + row * 64 + 32, 0, 32 * sizeof(*out));
261 // Re-pack non-zero coeffs in the first 32x16 indices.
262 for (int row = 1; row < 16; ++row) {
263 memcpy(out + row * 32, out + row * 64, 32 * sizeof(*out));
266 #endif // CONFIG_TX_64X64
268 // Apply appropriate scale.
269 const double amplify_factor = get_amplification_factor(tx_type, tx_size);
270 for (int c = 0; c < tx_width; ++c) {
271 for (int r = 0; r < tx_height; ++r) {
272 out[r * stride + c] *= amplify_factor;
277 template <typename Type>
278 void fliplr(Type *dest, int width, int height, int stride) {
279 for (int r = 0; r < height; ++r) {
280 for (int c = 0; c < width / 2; ++c) {
281 const Type tmp = dest[r * stride + c];
282 dest[r * stride + c] = dest[r * stride + width - 1 - c];
283 dest[r * stride + width - 1 - c] = tmp;
288 template <typename Type>
289 void flipud(Type *dest, int width, int height, int stride) {
290 for (int c = 0; c < width; ++c) {
291 for (int r = 0; r < height / 2; ++r) {
292 const Type tmp = dest[r * stride + c];
293 dest[r * stride + c] = dest[(height - 1 - r) * stride + c];
294 dest[(height - 1 - r) * stride + c] = tmp;
299 template <typename Type>
300 void fliplrud(Type *dest, int width, int height, int stride) {
301 for (int r = 0; r < height / 2; ++r) {
302 for (int c = 0; c < width; ++c) {
303 const Type tmp = dest[r * stride + c];
304 dest[r * stride + c] = dest[(height - 1 - r) * stride + width - 1 - c];
305 dest[(height - 1 - r) * stride + width - 1 - c] = tmp;
310 template void fliplr<double>(double *dest, int width, int height, int stride);
311 template void flipud<double>(double *dest, int width, int height, int stride);
312 template void fliplrud<double>(double *dest, int width, int height, int stride);
314 int bd_arr[BD_NUM] = { 8, 10, 12 };
316 #if CONFIG_TX64X64
317 int8_t low_range_arr[BD_NUM] = { 18, 32, 32 };
318 #else
319 int8_t low_range_arr[BD_NUM] = { 16, 32, 32 };
320 #endif // CONFIG_TX64X64
321 int8_t high_range_arr[BD_NUM] = { 32, 32, 32 };
323 void txfm_stage_range_check(const int8_t *stage_range, int stage_num,
324 const int8_t *cos_bit, int low_range,
325 int high_range) {
326 for (int i = 0; i < stage_num; ++i) {
327 EXPECT_LE(stage_range[i], low_range);
329 for (int i = 0; i < stage_num - 1; ++i) {
330 // make sure there is no overflow while doing half_btf()
331 EXPECT_LE(stage_range[i] + cos_bit[i], high_range);
332 EXPECT_LE(stage_range[i + 1] + cos_bit[i], high_range);
333 if (stage_range[i] + cos_bit[i] > high_range) {
334 std::cout << i;
335 assert(0);
337 if (stage_range[i + 1] + cos_bit[i] > high_range) {
338 std::cout << i;
339 assert(0);
343 } // namespace libaom_test