2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16 #include "config/av1_rtcd.h"
18 #include "aom_dsp/aom_dsp_common.h"
20 #include "av1/common/enums.h"
22 #include "test/acm_random.h"
23 #include "test/function_equivalence_test.h"
24 #include "test/register_state_check.h"
26 #define WEDGE_WEIGHT_BITS 6
27 #define MAX_MASK_VALUE (1 << (WEDGE_WEIGHT_BITS))
29 using libaom_test::ACMRandom
;
30 using libaom_test::FunctionEquivalenceTest
;
34 static const int16_t kInt13Max
= (1 << 12) - 1;
36 //////////////////////////////////////////////////////////////////////////////
37 // av1_wedge_sse_from_residuals - functionality
38 //////////////////////////////////////////////////////////////////////////////
40 class WedgeUtilsSSEFuncTest
: public testing::Test
{
42 WedgeUtilsSSEFuncTest() : rng_(ACMRandom::DeterministicSeed()) {}
44 static const int kIterations
= 1000;
49 static void equiv_blend_residuals(int16_t *r
, const int16_t *r0
,
50 const int16_t *r1
, const uint8_t *m
, int N
) {
51 for (int i
= 0; i
< N
; i
++) {
52 const int32_t m0
= m
[i
];
53 const int32_t m1
= MAX_MASK_VALUE
- m0
;
54 const int16_t R
= m0
* r0
[i
] + m1
* r1
[i
];
55 // Note that this rounding is designed to match the result
56 // you would get when actually blending the 2 predictors and computing
58 r
[i
] = ROUND_POWER_OF_TWO(R
- 1, WEDGE_WEIGHT_BITS
);
62 static uint64_t equiv_sse_from_residuals(const int16_t *r0
, const int16_t *r1
,
63 const uint8_t *m
, int N
) {
65 for (int i
= 0; i
< N
; i
++) {
66 const int32_t m0
= m
[i
];
67 const int32_t m1
= MAX_MASK_VALUE
- m0
;
68 const int16_t R
= m0
* r0
[i
] + m1
* r1
[i
];
69 const int32_t r
= ROUND_POWER_OF_TWO(R
- 1, WEDGE_WEIGHT_BITS
);
75 TEST_F(WedgeUtilsSSEFuncTest
, ResidualBlendingEquiv
) {
76 DECLARE_ALIGNED(32, uint8_t, s
[MAX_SB_SQUARE
]);
77 DECLARE_ALIGNED(32, uint8_t, p0
[MAX_SB_SQUARE
]);
78 DECLARE_ALIGNED(32, uint8_t, p1
[MAX_SB_SQUARE
]);
79 DECLARE_ALIGNED(32, uint8_t, p
[MAX_SB_SQUARE
]);
81 DECLARE_ALIGNED(32, int16_t, r0
[MAX_SB_SQUARE
]);
82 DECLARE_ALIGNED(32, int16_t, r1
[MAX_SB_SQUARE
]);
83 DECLARE_ALIGNED(32, int16_t, r_ref
[MAX_SB_SQUARE
]);
84 DECLARE_ALIGNED(32, int16_t, r_tst
[MAX_SB_SQUARE
]);
85 DECLARE_ALIGNED(32, uint8_t, m
[MAX_SB_SQUARE
]);
87 for (int iter
= 0; iter
< kIterations
&& !HasFatalFailure(); ++iter
) {
88 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) {
90 m
[i
] = rng_(MAX_MASK_VALUE
+ 1);
93 const int w
= 1 << (rng_(MAX_SB_SIZE_LOG2
+ 1 - 3) + 3);
94 const int h
= 1 << (rng_(MAX_SB_SIZE_LOG2
+ 1 - 3) + 3);
97 for (int j
= 0; j
< N
; j
++) {
98 p0
[j
] = clamp(s
[j
] + rng_(33) - 16, 0, UINT8_MAX
);
99 p1
[j
] = clamp(s
[j
] + rng_(33) - 16, 0, UINT8_MAX
);
102 aom_blend_a64_mask(p
, w
, p0
, w
, p1
, w
, m
, w
, w
, h
, 0, 0);
104 aom_subtract_block(h
, w
, r0
, w
, s
, w
, p0
, w
);
105 aom_subtract_block(h
, w
, r1
, w
, s
, w
, p1
, w
);
107 aom_subtract_block(h
, w
, r_ref
, w
, s
, w
, p
, w
);
108 equiv_blend_residuals(r_tst
, r0
, r1
, m
, N
);
110 for (int i
= 0; i
< N
; ++i
) ASSERT_EQ(r_ref
[i
], r_tst
[i
]);
112 uint64_t ref_sse
= aom_sum_squares_i16(r_ref
, N
);
113 uint64_t tst_sse
= equiv_sse_from_residuals(r0
, r1
, m
, N
);
115 ASSERT_EQ(ref_sse
, tst_sse
);
119 static uint64_t sse_from_residuals(const int16_t *r0
, const int16_t *r1
,
120 const uint8_t *m
, int N
) {
122 for (int i
= 0; i
< N
; i
++) {
123 const int32_t m0
= m
[i
];
124 const int32_t m1
= MAX_MASK_VALUE
- m0
;
125 const int32_t r
= m0
* r0
[i
] + m1
* r1
[i
];
128 return ROUND_POWER_OF_TWO(acc
, 2 * WEDGE_WEIGHT_BITS
);
131 TEST_F(WedgeUtilsSSEFuncTest
, ResidualBlendingMethod
) {
132 DECLARE_ALIGNED(32, int16_t, r0
[MAX_SB_SQUARE
]);
133 DECLARE_ALIGNED(32, int16_t, r1
[MAX_SB_SQUARE
]);
134 DECLARE_ALIGNED(32, int16_t, d
[MAX_SB_SQUARE
]);
135 DECLARE_ALIGNED(32, uint8_t, m
[MAX_SB_SQUARE
]);
137 for (int iter
= 0; iter
< kIterations
&& !HasFatalFailure(); ++iter
) {
138 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) {
139 r1
[i
] = rng_(2 * INT8_MAX
- 2 * INT8_MIN
+ 1) + 2 * INT8_MIN
;
140 d
[i
] = rng_(2 * INT8_MAX
- 2 * INT8_MIN
+ 1) + 2 * INT8_MIN
;
141 m
[i
] = rng_(MAX_MASK_VALUE
+ 1);
144 const int N
= 64 * (rng_(MAX_SB_SQUARE
/ 64) + 1);
146 for (int i
= 0; i
< N
; i
++) r0
[i
] = r1
[i
] + d
[i
];
148 const uint64_t ref_res
= sse_from_residuals(r0
, r1
, m
, N
);
149 const uint64_t tst_res
= av1_wedge_sse_from_residuals(r1
, d
, m
, N
);
151 ASSERT_EQ(ref_res
, tst_res
);
155 //////////////////////////////////////////////////////////////////////////////
156 // av1_wedge_sse_from_residuals - optimizations
157 //////////////////////////////////////////////////////////////////////////////
159 typedef uint64_t (*FSSE
)(const int16_t *r1
, const int16_t *d
, const uint8_t *m
,
161 typedef libaom_test::FuncParam
<FSSE
> TestFuncsFSSE
;
163 class WedgeUtilsSSEOptTest
: public FunctionEquivalenceTest
<FSSE
> {
165 static const int kIterations
= 10000;
168 TEST_P(WedgeUtilsSSEOptTest
, RandomValues
) {
169 DECLARE_ALIGNED(32, int16_t, r1
[MAX_SB_SQUARE
]);
170 DECLARE_ALIGNED(32, int16_t, d
[MAX_SB_SQUARE
]);
171 DECLARE_ALIGNED(32, uint8_t, m
[MAX_SB_SQUARE
]);
173 for (int iter
= 0; iter
< kIterations
&& !HasFatalFailure(); ++iter
) {
174 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) {
175 r1
[i
] = rng_(2 * kInt13Max
+ 1) - kInt13Max
;
176 d
[i
] = rng_(2 * kInt13Max
+ 1) - kInt13Max
;
177 m
[i
] = rng_(MAX_MASK_VALUE
+ 1);
180 const int N
= 64 * (rng_(MAX_SB_SQUARE
/ 64) + 1);
182 const uint64_t ref_res
= params_
.ref_func(r1
, d
, m
, N
);
184 ASM_REGISTER_STATE_CHECK(tst_res
= params_
.tst_func(r1
, d
, m
, N
));
186 ASSERT_EQ(ref_res
, tst_res
);
190 TEST_P(WedgeUtilsSSEOptTest
, ExtremeValues
) {
191 DECLARE_ALIGNED(32, int16_t, r1
[MAX_SB_SQUARE
]);
192 DECLARE_ALIGNED(32, int16_t, d
[MAX_SB_SQUARE
]);
193 DECLARE_ALIGNED(32, uint8_t, m
[MAX_SB_SQUARE
]);
195 for (int iter
= 0; iter
< kIterations
&& !HasFatalFailure(); ++iter
) {
197 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) r1
[i
] = kInt13Max
;
199 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) r1
[i
] = -kInt13Max
;
203 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) d
[i
] = kInt13Max
;
205 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) d
[i
] = -kInt13Max
;
208 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) m
[i
] = MAX_MASK_VALUE
;
210 const int N
= 64 * (rng_(MAX_SB_SQUARE
/ 64) + 1);
212 const uint64_t ref_res
= params_
.ref_func(r1
, d
, m
, N
);
214 ASM_REGISTER_STATE_CHECK(tst_res
= params_
.tst_func(r1
, d
, m
, N
));
216 ASSERT_EQ(ref_res
, tst_res
);
220 //////////////////////////////////////////////////////////////////////////////
221 // av1_wedge_sign_from_residuals
222 //////////////////////////////////////////////////////////////////////////////
224 typedef int (*FSign
)(const int16_t *ds
, const uint8_t *m
, int N
, int64_t limit
);
225 typedef libaom_test::FuncParam
<FSign
> TestFuncsFSign
;
227 class WedgeUtilsSignOptTest
: public FunctionEquivalenceTest
<FSign
> {
229 static const int kIterations
= 10000;
230 static const int kMaxSize
= 8196; // Size limited by SIMD implementation.
233 TEST_P(WedgeUtilsSignOptTest
, RandomValues
) {
234 DECLARE_ALIGNED(32, int16_t, r0
[MAX_SB_SQUARE
]);
235 DECLARE_ALIGNED(32, int16_t, r1
[MAX_SB_SQUARE
]);
236 DECLARE_ALIGNED(32, int16_t, ds
[MAX_SB_SQUARE
]);
237 DECLARE_ALIGNED(32, uint8_t, m
[MAX_SB_SQUARE
]);
239 for (int iter
= 0; iter
< kIterations
&& !HasFatalFailure(); ++iter
) {
240 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) {
241 r0
[i
] = rng_(2 * kInt13Max
+ 1) - kInt13Max
;
242 r1
[i
] = rng_(2 * kInt13Max
+ 1) - kInt13Max
;
243 m
[i
] = rng_(MAX_MASK_VALUE
+ 1);
246 const int maxN
= AOMMIN(kMaxSize
, MAX_SB_SQUARE
);
247 const int N
= 64 * (rng_(maxN
/ 64 - 1) + 1);
250 limit
= (int64_t)aom_sum_squares_i16(r0
, N
);
251 limit
-= (int64_t)aom_sum_squares_i16(r1
, N
);
252 limit
*= (1 << WEDGE_WEIGHT_BITS
) / 2;
254 for (int i
= 0; i
< N
; i
++)
255 ds
[i
] = clamp(r0
[i
] * r0
[i
] - r1
[i
] * r1
[i
], INT16_MIN
, INT16_MAX
);
257 const int ref_res
= params_
.ref_func(ds
, m
, N
, limit
);
259 ASM_REGISTER_STATE_CHECK(tst_res
= params_
.tst_func(ds
, m
, N
, limit
));
261 ASSERT_EQ(ref_res
, tst_res
);
265 TEST_P(WedgeUtilsSignOptTest
, ExtremeValues
) {
266 DECLARE_ALIGNED(32, int16_t, r0
[MAX_SB_SQUARE
]);
267 DECLARE_ALIGNED(32, int16_t, r1
[MAX_SB_SQUARE
]);
268 DECLARE_ALIGNED(32, int16_t, ds
[MAX_SB_SQUARE
]);
269 DECLARE_ALIGNED(32, uint8_t, m
[MAX_SB_SQUARE
]);
271 for (int iter
= 0; iter
< kIterations
&& !HasFatalFailure(); ++iter
) {
274 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) {
280 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) {
286 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) {
292 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) {
299 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) m
[i
] = MAX_MASK_VALUE
;
301 const int maxN
= AOMMIN(kMaxSize
, MAX_SB_SQUARE
);
302 const int N
= 64 * (rng_(maxN
/ 64 - 1) + 1);
305 limit
= (int64_t)aom_sum_squares_i16(r0
, N
);
306 limit
-= (int64_t)aom_sum_squares_i16(r1
, N
);
307 limit
*= (1 << WEDGE_WEIGHT_BITS
) / 2;
309 for (int i
= 0; i
< N
; i
++)
310 ds
[i
] = clamp(r0
[i
] * r0
[i
] - r1
[i
] * r1
[i
], INT16_MIN
, INT16_MAX
);
312 const int ref_res
= params_
.ref_func(ds
, m
, N
, limit
);
314 ASM_REGISTER_STATE_CHECK(tst_res
= params_
.tst_func(ds
, m
, N
, limit
));
316 ASSERT_EQ(ref_res
, tst_res
);
320 //////////////////////////////////////////////////////////////////////////////
321 // av1_wedge_compute_delta_squares
322 //////////////////////////////////////////////////////////////////////////////
324 typedef void (*FDS
)(int16_t *d
, const int16_t *a
, const int16_t *b
, int N
);
325 typedef libaom_test::FuncParam
<FDS
> TestFuncsFDS
;
327 class WedgeUtilsDeltaSquaresOptTest
: public FunctionEquivalenceTest
<FDS
> {
329 static const int kIterations
= 10000;
332 TEST_P(WedgeUtilsDeltaSquaresOptTest
, RandomValues
) {
333 DECLARE_ALIGNED(32, int16_t, a
[MAX_SB_SQUARE
]);
334 DECLARE_ALIGNED(32, int16_t, b
[MAX_SB_SQUARE
]);
335 DECLARE_ALIGNED(32, int16_t, d_ref
[MAX_SB_SQUARE
]);
336 DECLARE_ALIGNED(32, int16_t, d_tst
[MAX_SB_SQUARE
]);
338 for (int iter
= 0; iter
< kIterations
&& !HasFatalFailure(); ++iter
) {
339 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) {
340 a
[i
] = rng_
.Rand16();
341 b
[i
] = rng_(2 * INT16_MAX
+ 1) - INT16_MAX
;
344 const int N
= 64 * (rng_(MAX_SB_SQUARE
/ 64) + 1);
346 memset(&d_ref
, INT16_MAX
, sizeof(d_ref
));
347 memset(&d_tst
, INT16_MAX
, sizeof(d_tst
));
349 params_
.ref_func(d_ref
, a
, b
, N
);
350 ASM_REGISTER_STATE_CHECK(params_
.tst_func(d_tst
, a
, b
, N
));
352 for (int i
= 0; i
< MAX_SB_SQUARE
; ++i
) ASSERT_EQ(d_ref
[i
], d_tst
[i
]);
357 INSTANTIATE_TEST_CASE_P(
358 SSE2
, WedgeUtilsSSEOptTest
,
359 ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_c
,
360 av1_wedge_sse_from_residuals_sse2
)));
362 INSTANTIATE_TEST_CASE_P(
363 SSE2
, WedgeUtilsSignOptTest
,
364 ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_c
,
365 av1_wedge_sign_from_residuals_sse2
)));
367 INSTANTIATE_TEST_CASE_P(
368 SSE2
, WedgeUtilsDeltaSquaresOptTest
,
369 ::testing::Values(TestFuncsFDS(av1_wedge_compute_delta_squares_c
,
370 av1_wedge_compute_delta_squares_sse2
)));
374 INSTANTIATE_TEST_CASE_P(
375 AVX2
, WedgeUtilsSSEOptTest
,
376 ::testing::Values(TestFuncsFSSE(av1_wedge_sse_from_residuals_sse2
,
377 av1_wedge_sse_from_residuals_avx2
)));
379 INSTANTIATE_TEST_CASE_P(
380 AVX2
, WedgeUtilsSignOptTest
,
381 ::testing::Values(TestFuncsFSign(av1_wedge_sign_from_residuals_sse2
,
382 av1_wedge_sign_from_residuals_avx2
)));
384 INSTANTIATE_TEST_CASE_P(
385 AVX2
, WedgeUtilsDeltaSquaresOptTest
,
386 ::testing::Values(TestFuncsFDS(av1_wedge_compute_delta_squares_sse2
,
387 av1_wedge_compute_delta_squares_avx2
)));