2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
15 #include "third_party/googletest/src/include/gtest/gtest.h"
16 #include "test/acm_random.h"
17 #include "test/clear_system_state.h"
18 #include "test/register_state_check.h"
19 #include "test/util.h"
21 #include "./vp9_rtcd.h"
22 #include "vp9/common/vp9_entropy.h"
23 #include "vp9/common/vp9_scan.h"
24 #include "vpx/vpx_codec.h"
25 #include "vpx/vpx_integer.h"
26 #include "vpx_ports/mem.h"
28 using libvpx_test::ACMRandom
;
33 static int round(double x
) {
35 return static_cast<int>(ceil(x
- 0.5));
37 return static_cast<int>(floor(x
+ 0.5));
41 const int kNumCoeffs
= 256;
42 const double PI
= 3.1415926535898;
43 void reference2_16x16_idct_2d(double *input
, double *output
) {
45 for (int l
= 0; l
< 16; ++l
) {
46 for (int k
= 0; k
< 16; ++k
) {
48 for (int i
= 0; i
< 16; ++i
) {
49 for (int j
= 0; j
< 16; ++j
) {
50 x
= cos(PI
* j
* (l
+ 0.5) / 16.0) *
51 cos(PI
* i
* (k
+ 0.5) / 16.0) *
52 input
[i
* 16 + j
] / 256;
66 const double C1
= 0.995184726672197;
67 const double C2
= 0.98078528040323;
68 const double C3
= 0.956940335732209;
69 const double C4
= 0.923879532511287;
70 const double C5
= 0.881921264348355;
71 const double C6
= 0.831469612302545;
72 const double C7
= 0.773010453362737;
73 const double C8
= 0.707106781186548;
74 const double C9
= 0.634393284163646;
75 const double C10
= 0.555570233019602;
76 const double C11
= 0.471396736825998;
77 const double C12
= 0.38268343236509;
78 const double C13
= 0.290284677254462;
79 const double C14
= 0.195090322016128;
80 const double C15
= 0.098017140329561;
82 void butterfly_16x16_dct_1d(double input
[16], double output
[16]) {
84 double intermediate
[16];
88 step
[ 0] = input
[0] + input
[15];
89 step
[ 1] = input
[1] + input
[14];
90 step
[ 2] = input
[2] + input
[13];
91 step
[ 3] = input
[3] + input
[12];
92 step
[ 4] = input
[4] + input
[11];
93 step
[ 5] = input
[5] + input
[10];
94 step
[ 6] = input
[6] + input
[ 9];
95 step
[ 7] = input
[7] + input
[ 8];
96 step
[ 8] = input
[7] - input
[ 8];
97 step
[ 9] = input
[6] - input
[ 9];
98 step
[10] = input
[5] - input
[10];
99 step
[11] = input
[4] - input
[11];
100 step
[12] = input
[3] - input
[12];
101 step
[13] = input
[2] - input
[13];
102 step
[14] = input
[1] - input
[14];
103 step
[15] = input
[0] - input
[15];
106 output
[0] = step
[0] + step
[7];
107 output
[1] = step
[1] + step
[6];
108 output
[2] = step
[2] + step
[5];
109 output
[3] = step
[3] + step
[4];
110 output
[4] = step
[3] - step
[4];
111 output
[5] = step
[2] - step
[5];
112 output
[6] = step
[1] - step
[6];
113 output
[7] = step
[0] - step
[7];
115 temp1
= step
[ 8] * C7
;
116 temp2
= step
[15] * C9
;
117 output
[ 8] = temp1
+ temp2
;
119 temp1
= step
[ 9] * C11
;
120 temp2
= step
[14] * C5
;
121 output
[ 9] = temp1
- temp2
;
123 temp1
= step
[10] * C3
;
124 temp2
= step
[13] * C13
;
125 output
[10] = temp1
+ temp2
;
127 temp1
= step
[11] * C15
;
128 temp2
= step
[12] * C1
;
129 output
[11] = temp1
- temp2
;
131 temp1
= step
[11] * C1
;
132 temp2
= step
[12] * C15
;
133 output
[12] = temp2
+ temp1
;
135 temp1
= step
[10] * C13
;
136 temp2
= step
[13] * C3
;
137 output
[13] = temp2
- temp1
;
139 temp1
= step
[ 9] * C5
;
140 temp2
= step
[14] * C11
;
141 output
[14] = temp2
+ temp1
;
143 temp1
= step
[ 8] * C9
;
144 temp2
= step
[15] * C7
;
145 output
[15] = temp2
- temp1
;
148 step
[ 0] = output
[0] + output
[3];
149 step
[ 1] = output
[1] + output
[2];
150 step
[ 2] = output
[1] - output
[2];
151 step
[ 3] = output
[0] - output
[3];
153 temp1
= output
[4] * C14
;
154 temp2
= output
[7] * C2
;
155 step
[ 4] = temp1
+ temp2
;
157 temp1
= output
[5] * C10
;
158 temp2
= output
[6] * C6
;
159 step
[ 5] = temp1
+ temp2
;
161 temp1
= output
[5] * C6
;
162 temp2
= output
[6] * C10
;
163 step
[ 6] = temp2
- temp1
;
165 temp1
= output
[4] * C2
;
166 temp2
= output
[7] * C14
;
167 step
[ 7] = temp2
- temp1
;
169 step
[ 8] = output
[ 8] + output
[11];
170 step
[ 9] = output
[ 9] + output
[10];
171 step
[10] = output
[ 9] - output
[10];
172 step
[11] = output
[ 8] - output
[11];
174 step
[12] = output
[12] + output
[15];
175 step
[13] = output
[13] + output
[14];
176 step
[14] = output
[13] - output
[14];
177 step
[15] = output
[12] - output
[15];
180 output
[ 0] = (step
[ 0] + step
[ 1]);
181 output
[ 8] = (step
[ 0] - step
[ 1]);
183 temp1
= step
[2] * C12
;
184 temp2
= step
[3] * C4
;
185 temp1
= temp1
+ temp2
;
186 output
[ 4] = 2*(temp1
* C8
);
188 temp1
= step
[2] * C4
;
189 temp2
= step
[3] * C12
;
190 temp1
= temp2
- temp1
;
191 output
[12] = 2 * (temp1
* C8
);
193 output
[ 2] = 2 * ((step
[4] + step
[ 5]) * C8
);
194 output
[14] = 2 * ((step
[7] - step
[ 6]) * C8
);
196 temp1
= step
[4] - step
[5];
197 temp2
= step
[6] + step
[7];
198 output
[ 6] = (temp1
+ temp2
);
199 output
[10] = (temp1
- temp2
);
201 intermediate
[8] = step
[8] + step
[14];
202 intermediate
[9] = step
[9] + step
[15];
204 temp1
= intermediate
[8] * C12
;
205 temp2
= intermediate
[9] * C4
;
206 temp1
= temp1
- temp2
;
207 output
[3] = 2 * (temp1
* C8
);
209 temp1
= intermediate
[8] * C4
;
210 temp2
= intermediate
[9] * C12
;
211 temp1
= temp2
+ temp1
;
212 output
[13] = 2 * (temp1
* C8
);
214 output
[ 9] = 2 * ((step
[10] + step
[11]) * C8
);
216 intermediate
[11] = step
[10] - step
[11];
217 intermediate
[12] = step
[12] + step
[13];
218 intermediate
[13] = step
[12] - step
[13];
219 intermediate
[14] = step
[ 8] - step
[14];
220 intermediate
[15] = step
[ 9] - step
[15];
222 output
[15] = (intermediate
[11] + intermediate
[12]);
223 output
[ 1] = -(intermediate
[11] - intermediate
[12]);
225 output
[ 7] = 2 * (intermediate
[13] * C8
);
227 temp1
= intermediate
[14] * C12
;
228 temp2
= intermediate
[15] * C4
;
229 temp1
= temp1
- temp2
;
230 output
[11] = -2 * (temp1
* C8
);
232 temp1
= intermediate
[14] * C4
;
233 temp2
= intermediate
[15] * C12
;
234 temp1
= temp2
+ temp1
;
235 output
[ 5] = 2 * (temp1
* C8
);
238 void reference_16x16_dct_2d(int16_t input
[256], double output
[256]) {
239 // First transform columns
240 for (int i
= 0; i
< 16; ++i
) {
241 double temp_in
[16], temp_out
[16];
242 for (int j
= 0; j
< 16; ++j
)
243 temp_in
[j
] = input
[j
* 16 + i
];
244 butterfly_16x16_dct_1d(temp_in
, temp_out
);
245 for (int j
= 0; j
< 16; ++j
)
246 output
[j
* 16 + i
] = temp_out
[j
];
248 // Then transform rows
249 for (int i
= 0; i
< 16; ++i
) {
250 double temp_in
[16], temp_out
[16];
251 for (int j
= 0; j
< 16; ++j
)
252 temp_in
[j
] = output
[j
+ i
* 16];
253 butterfly_16x16_dct_1d(temp_in
, temp_out
);
254 // Scale by some magic number
255 for (int j
= 0; j
< 16; ++j
)
256 output
[j
+ i
* 16] = temp_out
[j
]/2;
260 typedef void (*FdctFunc
)(const int16_t *in
, tran_low_t
*out
, int stride
);
261 typedef void (*IdctFunc
)(const tran_low_t
*in
, uint8_t *out
, int stride
);
262 typedef void (*FhtFunc
)(const int16_t *in
, tran_low_t
*out
, int stride
,
264 typedef void (*IhtFunc
)(const tran_low_t
*in
, uint8_t *out
, int stride
,
267 typedef std::tr1::tuple
<FdctFunc
, IdctFunc
, int, vpx_bit_depth_t
> Dct16x16Param
;
268 typedef std::tr1::tuple
<FhtFunc
, IhtFunc
, int, vpx_bit_depth_t
> Ht16x16Param
;
269 typedef std::tr1::tuple
<IdctFunc
, IdctFunc
, int, vpx_bit_depth_t
>
272 void fdct16x16_ref(const int16_t *in
, tran_low_t
*out
, int stride
,
274 vp9_fdct16x16_c(in
, out
, stride
);
277 void idct16x16_ref(const tran_low_t
*in
, uint8_t *dest
, int stride
,
279 vp9_idct16x16_256_add_c(in
, dest
, stride
);
282 void fht16x16_ref(const int16_t *in
, tran_low_t
*out
, int stride
,
284 vp9_fht16x16_c(in
, out
, stride
, tx_type
);
287 void iht16x16_ref(const tran_low_t
*in
, uint8_t *dest
, int stride
,
289 vp9_iht16x16_256_add_c(in
, dest
, stride
, tx_type
);
292 #if CONFIG_VP9_HIGHBITDEPTH
293 void idct16x16_10(const tran_low_t
*in
, uint8_t *out
, int stride
) {
294 vp9_highbd_idct16x16_256_add_c(in
, out
, stride
, 10);
297 void idct16x16_12(const tran_low_t
*in
, uint8_t *out
, int stride
) {
298 vp9_highbd_idct16x16_256_add_c(in
, out
, stride
, 12);
301 void idct16x16_10_ref(const tran_low_t
*in
, uint8_t *out
, int stride
,
303 idct16x16_10(in
, out
, stride
);
306 void idct16x16_12_ref(const tran_low_t
*in
, uint8_t *out
, int stride
,
308 idct16x16_12(in
, out
, stride
);
311 void iht16x16_10(const tran_low_t
*in
, uint8_t *out
, int stride
, int tx_type
) {
312 vp9_highbd_iht16x16_256_add_c(in
, out
, stride
, tx_type
, 10);
315 void iht16x16_12(const tran_low_t
*in
, uint8_t *out
, int stride
, int tx_type
) {
316 vp9_highbd_iht16x16_256_add_c(in
, out
, stride
, tx_type
, 12);
319 void idct16x16_10_add_10_c(const tran_low_t
*in
, uint8_t *out
, int stride
) {
320 vp9_highbd_idct16x16_10_add_c(in
, out
, stride
, 10);
323 void idct16x16_10_add_12_c(const tran_low_t
*in
, uint8_t *out
, int stride
) {
324 vp9_highbd_idct16x16_10_add_c(in
, out
, stride
, 12);
328 void idct16x16_256_add_10_sse2(const tran_low_t
*in
, uint8_t *out
, int stride
) {
329 vp9_highbd_idct16x16_256_add_sse2(in
, out
, stride
, 10);
332 void idct16x16_256_add_12_sse2(const tran_low_t
*in
, uint8_t *out
, int stride
) {
333 vp9_highbd_idct16x16_256_add_sse2(in
, out
, stride
, 12);
336 void idct16x16_10_add_10_sse2(const tran_low_t
*in
, uint8_t *out
, int stride
) {
337 vp9_highbd_idct16x16_10_add_sse2(in
, out
, stride
, 10);
340 void idct16x16_10_add_12_sse2(const tran_low_t
*in
, uint8_t *out
, int stride
) {
341 vp9_highbd_idct16x16_10_add_sse2(in
, out
, stride
, 12);
344 #endif // CONFIG_VP9_HIGHBITDEPTH
346 class Trans16x16TestBase
{
348 virtual ~Trans16x16TestBase() {}
351 virtual void RunFwdTxfm(int16_t *in
, tran_low_t
*out
, int stride
) = 0;
353 virtual void RunInvTxfm(tran_low_t
*out
, uint8_t *dst
, int stride
) = 0;
355 void RunAccuracyCheck() {
356 ACMRandom
rnd(ACMRandom::DeterministicSeed());
357 uint32_t max_error
= 0;
358 int64_t total_error
= 0;
359 const int count_test_block
= 10000;
360 for (int i
= 0; i
< count_test_block
; ++i
) {
361 DECLARE_ALIGNED(16, int16_t, test_input_block
[kNumCoeffs
]);
362 DECLARE_ALIGNED(16, tran_low_t
, test_temp_block
[kNumCoeffs
]);
363 DECLARE_ALIGNED(16, uint8_t, dst
[kNumCoeffs
]);
364 DECLARE_ALIGNED(16, uint8_t, src
[kNumCoeffs
]);
365 #if CONFIG_VP9_HIGHBITDEPTH
366 DECLARE_ALIGNED(16, uint16_t, dst16
[kNumCoeffs
]);
367 DECLARE_ALIGNED(16, uint16_t, src16
[kNumCoeffs
]);
370 // Initialize a test block with input range [-mask_, mask_].
371 for (int j
= 0; j
< kNumCoeffs
; ++j
) {
372 if (bit_depth_
== VPX_BITS_8
) {
373 src
[j
] = rnd
.Rand8();
374 dst
[j
] = rnd
.Rand8();
375 test_input_block
[j
] = src
[j
] - dst
[j
];
376 #if CONFIG_VP9_HIGHBITDEPTH
378 src16
[j
] = rnd
.Rand16() & mask_
;
379 dst16
[j
] = rnd
.Rand16() & mask_
;
380 test_input_block
[j
] = src16
[j
] - dst16
[j
];
385 ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block
,
386 test_temp_block
, pitch_
));
387 if (bit_depth_
== VPX_BITS_8
) {
388 ASM_REGISTER_STATE_CHECK(
389 RunInvTxfm(test_temp_block
, dst
, pitch_
));
390 #if CONFIG_VP9_HIGHBITDEPTH
392 ASM_REGISTER_STATE_CHECK(
393 RunInvTxfm(test_temp_block
, CONVERT_TO_BYTEPTR(dst16
), pitch_
));
397 for (int j
= 0; j
< kNumCoeffs
; ++j
) {
398 #if CONFIG_VP9_HIGHBITDEPTH
399 const uint32_t diff
=
400 bit_depth_
== VPX_BITS_8
? dst
[j
] - src
[j
] : dst16
[j
] - src16
[j
];
402 const uint32_t diff
= dst
[j
] - src
[j
];
404 const uint32_t error
= diff
* diff
;
405 if (max_error
< error
)
407 total_error
+= error
;
411 EXPECT_GE(1u << 2 * (bit_depth_
- 8), max_error
)
412 << "Error: 16x16 FHT/IHT has an individual round trip error > 1";
414 EXPECT_GE(count_test_block
<< 2 * (bit_depth_
- 8), total_error
)
415 << "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
418 void RunCoeffCheck() {
419 ACMRandom
rnd(ACMRandom::DeterministicSeed());
420 const int count_test_block
= 1000;
421 DECLARE_ALIGNED(16, int16_t, input_block
[kNumCoeffs
]);
422 DECLARE_ALIGNED(16, tran_low_t
, output_ref_block
[kNumCoeffs
]);
423 DECLARE_ALIGNED(16, tran_low_t
, output_block
[kNumCoeffs
]);
425 for (int i
= 0; i
< count_test_block
; ++i
) {
426 // Initialize a test block with input range [-mask_, mask_].
427 for (int j
= 0; j
< kNumCoeffs
; ++j
)
428 input_block
[j
] = (rnd
.Rand16() & mask_
) - (rnd
.Rand16() & mask_
);
430 fwd_txfm_ref(input_block
, output_ref_block
, pitch_
, tx_type_
);
431 ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block
, output_block
, pitch_
));
433 // The minimum quant value is 4.
434 for (int j
= 0; j
< kNumCoeffs
; ++j
)
435 EXPECT_EQ(output_block
[j
], output_ref_block
[j
]);
440 ACMRandom
rnd(ACMRandom::DeterministicSeed());
441 const int count_test_block
= 1000;
442 DECLARE_ALIGNED(16, int16_t, input_extreme_block
[kNumCoeffs
]);
443 DECLARE_ALIGNED(16, tran_low_t
, output_ref_block
[kNumCoeffs
]);
444 DECLARE_ALIGNED(16, tran_low_t
, output_block
[kNumCoeffs
]);
446 for (int i
= 0; i
< count_test_block
; ++i
) {
447 // Initialize a test block with input range [-mask_, mask_].
448 for (int j
= 0; j
< kNumCoeffs
; ++j
) {
449 input_extreme_block
[j
] = rnd
.Rand8() % 2 ? mask_
: -mask_
;
452 for (int j
= 0; j
< kNumCoeffs
; ++j
)
453 input_extreme_block
[j
] = mask_
;
455 for (int j
= 0; j
< kNumCoeffs
; ++j
)
456 input_extreme_block
[j
] = -mask_
;
459 fwd_txfm_ref(input_extreme_block
, output_ref_block
, pitch_
, tx_type_
);
460 ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block
,
461 output_block
, pitch_
));
463 // The minimum quant value is 4.
464 for (int j
= 0; j
< kNumCoeffs
; ++j
) {
465 EXPECT_EQ(output_block
[j
], output_ref_block
[j
]);
466 EXPECT_GE(4 * DCT_MAX_VALUE
<< (bit_depth_
- 8), abs(output_block
[j
]))
467 << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
472 void RunQuantCheck(int dc_thred
, int ac_thred
) {
473 ACMRandom
rnd(ACMRandom::DeterministicSeed());
474 const int count_test_block
= 100000;
475 DECLARE_ALIGNED(16, int16_t, input_extreme_block
[kNumCoeffs
]);
476 DECLARE_ALIGNED(16, tran_low_t
, output_ref_block
[kNumCoeffs
]);
478 DECLARE_ALIGNED(16, uint8_t, dst
[kNumCoeffs
]);
479 DECLARE_ALIGNED(16, uint8_t, ref
[kNumCoeffs
]);
480 #if CONFIG_VP9_HIGHBITDEPTH
481 DECLARE_ALIGNED(16, uint16_t, dst16
[kNumCoeffs
]);
482 DECLARE_ALIGNED(16, uint16_t, ref16
[kNumCoeffs
]);
485 for (int i
= 0; i
< count_test_block
; ++i
) {
486 // Initialize a test block with input range [-mask_, mask_].
487 for (int j
= 0; j
< kNumCoeffs
; ++j
) {
488 input_extreme_block
[j
] = rnd
.Rand8() % 2 ? mask_
: -mask_
;
491 for (int j
= 0; j
< kNumCoeffs
; ++j
)
492 input_extreme_block
[j
] = mask_
;
494 for (int j
= 0; j
< kNumCoeffs
; ++j
)
495 input_extreme_block
[j
] = -mask_
;
497 fwd_txfm_ref(input_extreme_block
, output_ref_block
, pitch_
, tx_type_
);
499 // clear reconstructed pixel buffers
500 memset(dst
, 0, kNumCoeffs
* sizeof(uint8_t));
501 memset(ref
, 0, kNumCoeffs
* sizeof(uint8_t));
502 #if CONFIG_VP9_HIGHBITDEPTH
503 memset(dst16
, 0, kNumCoeffs
* sizeof(uint16_t));
504 memset(ref16
, 0, kNumCoeffs
* sizeof(uint16_t));
507 // quantization with maximum allowed step sizes
508 output_ref_block
[0] = (output_ref_block
[0] / dc_thred
) * dc_thred
;
509 for (int j
= 1; j
< kNumCoeffs
; ++j
)
510 output_ref_block
[j
] = (output_ref_block
[j
] / ac_thred
) * ac_thred
;
511 if (bit_depth_
== VPX_BITS_8
) {
512 inv_txfm_ref(output_ref_block
, ref
, pitch_
, tx_type_
);
513 ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block
, dst
, pitch_
));
514 #if CONFIG_VP9_HIGHBITDEPTH
516 inv_txfm_ref(output_ref_block
, CONVERT_TO_BYTEPTR(ref16
), pitch_
,
518 ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block
,
519 CONVERT_TO_BYTEPTR(dst16
), pitch_
));
522 if (bit_depth_
== VPX_BITS_8
) {
523 for (int j
= 0; j
< kNumCoeffs
; ++j
)
524 EXPECT_EQ(ref
[j
], dst
[j
]);
525 #if CONFIG_VP9_HIGHBITDEPTH
527 for (int j
= 0; j
< kNumCoeffs
; ++j
)
528 EXPECT_EQ(ref16
[j
], dst16
[j
]);
534 void RunInvAccuracyCheck() {
535 ACMRandom
rnd(ACMRandom::DeterministicSeed());
536 const int count_test_block
= 1000;
537 DECLARE_ALIGNED(16, int16_t, in
[kNumCoeffs
]);
538 DECLARE_ALIGNED(16, tran_low_t
, coeff
[kNumCoeffs
]);
539 DECLARE_ALIGNED(16, uint8_t, dst
[kNumCoeffs
]);
540 DECLARE_ALIGNED(16, uint8_t, src
[kNumCoeffs
]);
541 #if CONFIG_VP9_HIGHBITDEPTH
542 DECLARE_ALIGNED(16, uint16_t, dst16
[kNumCoeffs
]);
543 DECLARE_ALIGNED(16, uint16_t, src16
[kNumCoeffs
]);
544 #endif // CONFIG_VP9_HIGHBITDEPTH
546 for (int i
= 0; i
< count_test_block
; ++i
) {
547 double out_r
[kNumCoeffs
];
549 // Initialize a test block with input range [-255, 255].
550 for (int j
= 0; j
< kNumCoeffs
; ++j
) {
551 if (bit_depth_
== VPX_BITS_8
) {
552 src
[j
] = rnd
.Rand8();
553 dst
[j
] = rnd
.Rand8();
554 in
[j
] = src
[j
] - dst
[j
];
555 #if CONFIG_VP9_HIGHBITDEPTH
557 src16
[j
] = rnd
.Rand16() & mask_
;
558 dst16
[j
] = rnd
.Rand16() & mask_
;
559 in
[j
] = src16
[j
] - dst16
[j
];
560 #endif // CONFIG_VP9_HIGHBITDEPTH
564 reference_16x16_dct_2d(in
, out_r
);
565 for (int j
= 0; j
< kNumCoeffs
; ++j
)
566 coeff
[j
] = static_cast<tran_low_t
>(round(out_r
[j
]));
568 if (bit_depth_
== VPX_BITS_8
) {
569 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff
, dst
, 16));
570 #if CONFIG_VP9_HIGHBITDEPTH
572 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff
, CONVERT_TO_BYTEPTR(dst16
),
574 #endif // CONFIG_VP9_HIGHBITDEPTH
577 for (int j
= 0; j
< kNumCoeffs
; ++j
) {
578 #if CONFIG_VP9_HIGHBITDEPTH
579 const uint32_t diff
=
580 bit_depth_
== VPX_BITS_8
? dst
[j
] - src
[j
] : dst16
[j
] - src16
[j
];
582 const uint32_t diff
= dst
[j
] - src
[j
];
583 #endif // CONFIG_VP9_HIGHBITDEPTH
584 const uint32_t error
= diff
* diff
;
586 << "Error: 16x16 IDCT has error " << error
587 << " at index " << j
;
592 void CompareInvReference(IdctFunc ref_txfm
, int thresh
) {
593 ACMRandom
rnd(ACMRandom::DeterministicSeed());
594 const int count_test_block
= 10000;
596 const int16_t *scan
= vp9_default_scan_orders
[TX_16X16
].scan
;
597 DECLARE_ALIGNED(16, tran_low_t
, coeff
[kNumCoeffs
]);
598 DECLARE_ALIGNED(16, uint8_t, dst
[kNumCoeffs
]);
599 DECLARE_ALIGNED(16, uint8_t, ref
[kNumCoeffs
]);
600 #if CONFIG_VP9_HIGHBITDEPTH
601 DECLARE_ALIGNED(16, uint16_t, dst16
[kNumCoeffs
]);
602 DECLARE_ALIGNED(16, uint16_t, ref16
[kNumCoeffs
]);
603 #endif // CONFIG_VP9_HIGHBITDEPTH
605 for (int i
= 0; i
< count_test_block
; ++i
) {
606 for (int j
= 0; j
< kNumCoeffs
; ++j
) {
608 // Random values less than the threshold, either positive or negative
609 coeff
[scan
[j
]] = rnd(thresh
) * (1 - 2 * (i
% 2));
613 if (bit_depth_
== VPX_BITS_8
) {
616 #if CONFIG_VP9_HIGHBITDEPTH
620 #endif // CONFIG_VP9_HIGHBITDEPTH
623 if (bit_depth_
== VPX_BITS_8
) {
624 ref_txfm(coeff
, ref
, pitch_
);
625 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff
, dst
, pitch_
));
627 #if CONFIG_VP9_HIGHBITDEPTH
628 ref_txfm(coeff
, CONVERT_TO_BYTEPTR(ref16
), pitch_
);
629 ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff
, CONVERT_TO_BYTEPTR(dst16
),
631 #endif // CONFIG_VP9_HIGHBITDEPTH
634 for (int j
= 0; j
< kNumCoeffs
; ++j
) {
635 #if CONFIG_VP9_HIGHBITDEPTH
636 const uint32_t diff
=
637 bit_depth_
== VPX_BITS_8
? dst
[j
] - ref
[j
] : dst16
[j
] - ref16
[j
];
639 const uint32_t diff
= dst
[j
] - ref
[j
];
640 #endif // CONFIG_VP9_HIGHBITDEPTH
641 const uint32_t error
= diff
* diff
;
643 << "Error: 16x16 IDCT Comparison has error " << error
644 << " at index " << j
;
651 vpx_bit_depth_t bit_depth_
;
653 FhtFunc fwd_txfm_ref
;
654 IhtFunc inv_txfm_ref
;
658 : public Trans16x16TestBase
,
659 public ::testing::TestWithParam
<Dct16x16Param
> {
661 virtual ~Trans16x16DCT() {}
663 virtual void SetUp() {
664 fwd_txfm_
= GET_PARAM(0);
665 inv_txfm_
= GET_PARAM(1);
666 tx_type_
= GET_PARAM(2);
667 bit_depth_
= GET_PARAM(3);
669 fwd_txfm_ref
= fdct16x16_ref
;
670 inv_txfm_ref
= idct16x16_ref
;
671 mask_
= (1 << bit_depth_
) - 1;
672 #if CONFIG_VP9_HIGHBITDEPTH
673 switch (bit_depth_
) {
675 inv_txfm_ref
= idct16x16_10_ref
;
678 inv_txfm_ref
= idct16x16_12_ref
;
681 inv_txfm_ref
= idct16x16_ref
;
685 inv_txfm_ref
= idct16x16_ref
;
688 virtual void TearDown() { libvpx_test::ClearSystemState(); }
691 void RunFwdTxfm(int16_t *in
, tran_low_t
*out
, int stride
) {
692 fwd_txfm_(in
, out
, stride
);
694 void RunInvTxfm(tran_low_t
*out
, uint8_t *dst
, int stride
) {
695 inv_txfm_(out
, dst
, stride
);
702 TEST_P(Trans16x16DCT
, AccuracyCheck
) {
706 TEST_P(Trans16x16DCT
, CoeffCheck
) {
710 TEST_P(Trans16x16DCT
, MemCheck
) {
714 TEST_P(Trans16x16DCT
, QuantCheck
) {
715 // Use maximally allowed quantization step sizes for DC and AC
716 // coefficients respectively.
717 RunQuantCheck(1336, 1828);
720 TEST_P(Trans16x16DCT
, InvAccuracyCheck
) {
721 RunInvAccuracyCheck();
725 : public Trans16x16TestBase
,
726 public ::testing::TestWithParam
<Ht16x16Param
> {
728 virtual ~Trans16x16HT() {}
730 virtual void SetUp() {
731 fwd_txfm_
= GET_PARAM(0);
732 inv_txfm_
= GET_PARAM(1);
733 tx_type_
= GET_PARAM(2);
734 bit_depth_
= GET_PARAM(3);
736 fwd_txfm_ref
= fht16x16_ref
;
737 inv_txfm_ref
= iht16x16_ref
;
738 mask_
= (1 << bit_depth_
) - 1;
739 #if CONFIG_VP9_HIGHBITDEPTH
740 switch (bit_depth_
) {
742 inv_txfm_ref
= iht16x16_10
;
745 inv_txfm_ref
= iht16x16_12
;
748 inv_txfm_ref
= iht16x16_ref
;
752 inv_txfm_ref
= iht16x16_ref
;
755 virtual void TearDown() { libvpx_test::ClearSystemState(); }
758 void RunFwdTxfm(int16_t *in
, tran_low_t
*out
, int stride
) {
759 fwd_txfm_(in
, out
, stride
, tx_type_
);
761 void RunInvTxfm(tran_low_t
*out
, uint8_t *dst
, int stride
) {
762 inv_txfm_(out
, dst
, stride
, tx_type_
);
769 TEST_P(Trans16x16HT
, AccuracyCheck
) {
773 TEST_P(Trans16x16HT
, CoeffCheck
) {
777 TEST_P(Trans16x16HT
, MemCheck
) {
781 TEST_P(Trans16x16HT
, QuantCheck
) {
782 // The encoder skips any non-DC intra prediction modes,
783 // when the quantization step size goes beyond 988.
784 RunQuantCheck(429, 729);
787 class InvTrans16x16DCT
788 : public Trans16x16TestBase
,
789 public ::testing::TestWithParam
<Idct16x16Param
> {
791 virtual ~InvTrans16x16DCT() {}
793 virtual void SetUp() {
794 ref_txfm_
= GET_PARAM(0);
795 inv_txfm_
= GET_PARAM(1);
796 thresh_
= GET_PARAM(2);
797 bit_depth_
= GET_PARAM(3);
799 mask_
= (1 << bit_depth_
) - 1;
801 virtual void TearDown() { libvpx_test::ClearSystemState(); }
804 void RunFwdTxfm(int16_t *in
, tran_low_t
*out
, int stride
) {}
805 void RunInvTxfm(tran_low_t
*out
, uint8_t *dst
, int stride
) {
806 inv_txfm_(out
, dst
, stride
);
814 TEST_P(InvTrans16x16DCT
, CompareReference
) {
815 CompareInvReference(ref_txfm_
, thresh_
);
818 using std::tr1::make_tuple
;
820 #if CONFIG_VP9_HIGHBITDEPTH
821 INSTANTIATE_TEST_CASE_P(
824 make_tuple(&vp9_highbd_fdct16x16_c
, &idct16x16_10
, 0, VPX_BITS_10
),
825 make_tuple(&vp9_highbd_fdct16x16_c
, &idct16x16_12
, 0, VPX_BITS_12
),
826 make_tuple(&vp9_fdct16x16_c
, &vp9_idct16x16_256_add_c
, 0, VPX_BITS_8
)));
828 INSTANTIATE_TEST_CASE_P(
831 make_tuple(&vp9_fdct16x16_c
, &vp9_idct16x16_256_add_c
, 0, VPX_BITS_8
)));
832 #endif // CONFIG_VP9_HIGHBITDEPTH
834 #if CONFIG_VP9_HIGHBITDEPTH
835 INSTANTIATE_TEST_CASE_P(
838 make_tuple(&vp9_highbd_fht16x16_c
, &iht16x16_10
, 0, VPX_BITS_10
),
839 make_tuple(&vp9_highbd_fht16x16_c
, &iht16x16_10
, 1, VPX_BITS_10
),
840 make_tuple(&vp9_highbd_fht16x16_c
, &iht16x16_10
, 2, VPX_BITS_10
),
841 make_tuple(&vp9_highbd_fht16x16_c
, &iht16x16_10
, 3, VPX_BITS_10
),
842 make_tuple(&vp9_highbd_fht16x16_c
, &iht16x16_12
, 0, VPX_BITS_12
),
843 make_tuple(&vp9_highbd_fht16x16_c
, &iht16x16_12
, 1, VPX_BITS_12
),
844 make_tuple(&vp9_highbd_fht16x16_c
, &iht16x16_12
, 2, VPX_BITS_12
),
845 make_tuple(&vp9_highbd_fht16x16_c
, &iht16x16_12
, 3, VPX_BITS_12
),
846 make_tuple(&vp9_fht16x16_c
, &vp9_iht16x16_256_add_c
, 0, VPX_BITS_8
),
847 make_tuple(&vp9_fht16x16_c
, &vp9_iht16x16_256_add_c
, 1, VPX_BITS_8
),
848 make_tuple(&vp9_fht16x16_c
, &vp9_iht16x16_256_add_c
, 2, VPX_BITS_8
),
849 make_tuple(&vp9_fht16x16_c
, &vp9_iht16x16_256_add_c
, 3, VPX_BITS_8
)));
851 INSTANTIATE_TEST_CASE_P(
854 make_tuple(&vp9_fht16x16_c
, &vp9_iht16x16_256_add_c
, 0, VPX_BITS_8
),
855 make_tuple(&vp9_fht16x16_c
, &vp9_iht16x16_256_add_c
, 1, VPX_BITS_8
),
856 make_tuple(&vp9_fht16x16_c
, &vp9_iht16x16_256_add_c
, 2, VPX_BITS_8
),
857 make_tuple(&vp9_fht16x16_c
, &vp9_iht16x16_256_add_c
, 3, VPX_BITS_8
)));
858 #endif // CONFIG_VP9_HIGHBITDEPTH
860 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
861 INSTANTIATE_TEST_CASE_P(
864 make_tuple(&vp9_fdct16x16_c
,
865 &vp9_idct16x16_256_add_neon
, 0, VPX_BITS_8
)));
868 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
869 INSTANTIATE_TEST_CASE_P(
872 make_tuple(&vp9_fdct16x16_sse2
,
873 &vp9_idct16x16_256_add_sse2
, 0, VPX_BITS_8
)));
874 INSTANTIATE_TEST_CASE_P(
877 make_tuple(&vp9_fht16x16_sse2
, &vp9_iht16x16_256_add_sse2
, 0,
879 make_tuple(&vp9_fht16x16_sse2
, &vp9_iht16x16_256_add_sse2
, 1,
881 make_tuple(&vp9_fht16x16_sse2
, &vp9_iht16x16_256_add_sse2
, 2,
883 make_tuple(&vp9_fht16x16_sse2
, &vp9_iht16x16_256_add_sse2
, 3,
885 #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
887 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
888 INSTANTIATE_TEST_CASE_P(
891 make_tuple(&vp9_highbd_fdct16x16_sse2
,
892 &idct16x16_10
, 0, VPX_BITS_10
),
893 make_tuple(&vp9_highbd_fdct16x16_c
,
894 &idct16x16_256_add_10_sse2
, 0, VPX_BITS_10
),
895 make_tuple(&vp9_highbd_fdct16x16_sse2
,
896 &idct16x16_12
, 0, VPX_BITS_12
),
897 make_tuple(&vp9_highbd_fdct16x16_c
,
898 &idct16x16_256_add_12_sse2
, 0, VPX_BITS_12
),
899 make_tuple(&vp9_fdct16x16_sse2
,
900 &vp9_idct16x16_256_add_c
, 0, VPX_BITS_8
)));
901 INSTANTIATE_TEST_CASE_P(
904 make_tuple(&vp9_highbd_fht16x16_sse2
, &iht16x16_10
, 0, VPX_BITS_10
),
905 make_tuple(&vp9_highbd_fht16x16_sse2
, &iht16x16_10
, 1, VPX_BITS_10
),
906 make_tuple(&vp9_highbd_fht16x16_sse2
, &iht16x16_10
, 2, VPX_BITS_10
),
907 make_tuple(&vp9_highbd_fht16x16_sse2
, &iht16x16_10
, 3, VPX_BITS_10
),
908 make_tuple(&vp9_highbd_fht16x16_sse2
, &iht16x16_12
, 0, VPX_BITS_12
),
909 make_tuple(&vp9_highbd_fht16x16_sse2
, &iht16x16_12
, 1, VPX_BITS_12
),
910 make_tuple(&vp9_highbd_fht16x16_sse2
, &iht16x16_12
, 2, VPX_BITS_12
),
911 make_tuple(&vp9_highbd_fht16x16_sse2
, &iht16x16_12
, 3, VPX_BITS_12
),
912 make_tuple(&vp9_fht16x16_sse2
, &vp9_iht16x16_256_add_c
, 0, VPX_BITS_8
),
913 make_tuple(&vp9_fht16x16_sse2
, &vp9_iht16x16_256_add_c
, 1, VPX_BITS_8
),
914 make_tuple(&vp9_fht16x16_sse2
, &vp9_iht16x16_256_add_c
, 2, VPX_BITS_8
),
915 make_tuple(&vp9_fht16x16_sse2
, &vp9_iht16x16_256_add_c
, 3,
917 // Optimizations take effect at a threshold of 3155, so we use a value close to
918 // that to test both branches.
919 INSTANTIATE_TEST_CASE_P(
920 SSE2
, InvTrans16x16DCT
,
922 make_tuple(&idct16x16_10_add_10_c
,
923 &idct16x16_10_add_10_sse2
, 3167, VPX_BITS_10
),
924 make_tuple(&idct16x16_10
,
925 &idct16x16_256_add_10_sse2
, 3167, VPX_BITS_10
),
926 make_tuple(&idct16x16_10_add_12_c
,
927 &idct16x16_10_add_12_sse2
, 3167, VPX_BITS_12
),
928 make_tuple(&idct16x16_12
,
929 &idct16x16_256_add_12_sse2
, 3167, VPX_BITS_12
)));
930 #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
932 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
933 INSTANTIATE_TEST_CASE_P(
936 make_tuple(&vp9_fdct16x16_c
,
937 &vp9_idct16x16_256_add_msa
, 0, VPX_BITS_8
)));
938 INSTANTIATE_TEST_CASE_P(
941 make_tuple(&vp9_fht16x16_c
, &vp9_iht16x16_256_add_msa
, 0, VPX_BITS_8
),
942 make_tuple(&vp9_fht16x16_c
, &vp9_iht16x16_256_add_msa
, 1, VPX_BITS_8
),
943 make_tuple(&vp9_fht16x16_c
, &vp9_iht16x16_256_add_msa
, 2, VPX_BITS_8
),
944 make_tuple(&vp9_fht16x16_c
, &vp9_iht16x16_256_add_msa
, 3, VPX_BITS_8
)));
945 #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE