2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "./vpx_config.h"
12 #include "./vpx_dsp_rtcd.h"
14 #include "vpx_ports/mem.h"
15 #include "vpx/vpx_integer.h"
17 #include "vpx_dsp/variance.h"
19 static const uint8_t bilinear_filters
[8][2] = {
30 uint32_t vpx_get4x4sse_cs_c(const uint8_t *a
, int a_stride
,
31 const uint8_t *b
, int b_stride
) {
35 for (r
= 0; r
< 4; ++r
) {
36 for (c
= 0; c
< 4; ++c
) {
37 int diff
= a
[c
] - b
[c
];
38 distortion
+= diff
* diff
;
48 uint32_t vpx_get_mb_ss_c(const int16_t *a
) {
49 unsigned int i
, sum
= 0;
51 for (i
= 0; i
< 256; ++i
) {
58 uint32_t vpx_variance_halfpixvar16x16_h_c(const uint8_t *a
, int a_stride
,
59 const uint8_t *b
, int b_stride
,
61 return vpx_sub_pixel_variance16x16_c(a
, a_stride
, 4, 0,
66 uint32_t vpx_variance_halfpixvar16x16_v_c(const uint8_t *a
, int a_stride
,
67 const uint8_t *b
, int b_stride
,
69 return vpx_sub_pixel_variance16x16_c(a
, a_stride
, 0, 4,
73 uint32_t vpx_variance_halfpixvar16x16_hv_c(const uint8_t *a
, int a_stride
,
74 const uint8_t *b
, int b_stride
,
76 return vpx_sub_pixel_variance16x16_c(a
, a_stride
, 4, 4,
80 static void variance(const uint8_t *a
, int a_stride
,
81 const uint8_t *b
, int b_stride
,
82 int w
, int h
, uint32_t *sse
, int *sum
) {
88 for (i
= 0; i
< h
; ++i
) {
89 for (j
= 0; j
< w
; ++j
) {
90 const int diff
= a
[j
] - b
[j
];
100 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
101 // or vertical direction to produce the filtered output block. Used to implement
102 // the first-pass of 2-D separable filter.
104 // Produces int16_t output to retain precision for the next pass. Two filter
105 // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
106 // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
107 // It defines the offset required to move from one input to the next.
108 static void var_filter_block2d_bil_first_pass(const uint8_t *a
, uint16_t *b
,
109 unsigned int src_pixels_per_line
,
111 unsigned int output_height
,
112 unsigned int output_width
,
113 const uint8_t *filter
) {
116 for (i
= 0; i
< output_height
; ++i
) {
117 for (j
= 0; j
< output_width
; ++j
) {
118 b
[j
] = ROUND_POWER_OF_TWO((int)a
[0] * filter
[0] +
119 (int)a
[pixel_step
] * filter
[1],
125 a
+= src_pixels_per_line
- output_width
;
130 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
131 // or vertical direction to produce the filtered output block. Used to implement
132 // the second-pass of 2-D separable filter.
134 // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
135 // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
136 // filter is applied horizontally (pixel_step = 1) or vertically
137 // (pixel_step = stride). It defines the offset required to move from one input
138 // to the next. Output is 8-bit.
139 static void var_filter_block2d_bil_second_pass(const uint16_t *a
, uint8_t *b
,
140 unsigned int src_pixels_per_line
,
141 unsigned int pixel_step
,
142 unsigned int output_height
,
143 unsigned int output_width
,
144 const uint8_t *filter
) {
147 for (i
= 0; i
< output_height
; ++i
) {
148 for (j
= 0; j
< output_width
; ++j
) {
149 b
[j
] = ROUND_POWER_OF_TWO((int)a
[0] * filter
[0] +
150 (int)a
[pixel_step
] * filter
[1],
155 a
+= src_pixels_per_line
- output_width
;
161 uint32_t vpx_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
162 const uint8_t *b, int b_stride, \
165 variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
166 return *sse - (((int64_t)sum * sum) / (W * H)); \
169 #define SUBPIX_VAR(W, H) \
170 uint32_t vpx_sub_pixel_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
171 int xoffset, int yoffset, \
172 const uint8_t *b, int b_stride, \
174 uint16_t fdata3[(H + 1) * W]; \
175 uint8_t temp2[H * W]; \
177 var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
178 bilinear_filters[xoffset]); \
179 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
180 bilinear_filters[yoffset]); \
182 return vpx_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \
185 #define SUBPIX_AVG_VAR(W, H) \
186 uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c(const uint8_t *a, \
188 int xoffset, int yoffset, \
192 const uint8_t *second_pred) { \
193 uint16_t fdata3[(H + 1) * W]; \
194 uint8_t temp2[H * W]; \
195 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
197 var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
198 bilinear_filters[xoffset]); \
199 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
200 bilinear_filters[yoffset]); \
202 vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
204 return vpx_variance##W##x##H##_c(temp3, W, b, b_stride, sse); \
207 /* Identical to the variance call except it takes an additional parameter, sum,
208 * and returns that value using pass-by-reference instead of returning
211 #define GET_VAR(W, H) \
212 void vpx_get##W##x##H##var_c(const uint8_t *a, int a_stride, \
213 const uint8_t *b, int b_stride, \
214 uint32_t *sse, int *sum) { \
215 variance(a, a_stride, b, b_stride, W, H, sse, sum); \
218 /* Identical to the variance call except it does not calculate the
219 * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
223 uint32_t vpx_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
224 const uint8_t *b, int b_stride, \
227 variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
231 /* All three forms of the variance are available in the same sizes. */
232 #define VARIANCES(W, H) \
259 void vpx_comp_avg_pred_c(uint8_t *comp_pred
, const uint8_t *pred
,
260 int width
, int height
,
261 const uint8_t *ref
, int ref_stride
) {
264 for (i
= 0; i
< height
; ++i
) {
265 for (j
= 0; j
< width
; ++j
) {
266 const int tmp
= pred
[j
] + ref
[j
];
267 comp_pred
[j
] = ROUND_POWER_OF_TWO(tmp
, 1);
275 #if CONFIG_VP9_HIGHBITDEPTH
276 static void highbd_variance64(const uint8_t *a8
, int a_stride
,
277 const uint8_t *b8
, int b_stride
,
278 int w
, int h
, uint64_t *sse
, uint64_t *sum
) {
281 uint16_t *a
= CONVERT_TO_SHORTPTR(a8
);
282 uint16_t *b
= CONVERT_TO_SHORTPTR(b8
);
286 for (i
= 0; i
< h
; ++i
) {
287 for (j
= 0; j
< w
; ++j
) {
288 const int diff
= a
[j
] - b
[j
];
297 static void highbd_8_variance(const uint8_t *a8
, int a_stride
,
298 const uint8_t *b8
, int b_stride
,
299 int w
, int h
, uint32_t *sse
, int *sum
) {
300 uint64_t sse_long
= 0;
301 uint64_t sum_long
= 0;
302 highbd_variance64(a8
, a_stride
, b8
, b_stride
, w
, h
, &sse_long
, &sum_long
);
303 *sse
= (uint32_t)sse_long
;
304 *sum
= (int)sum_long
;
307 static void highbd_10_variance(const uint8_t *a8
, int a_stride
,
308 const uint8_t *b8
, int b_stride
,
309 int w
, int h
, uint32_t *sse
, int *sum
) {
310 uint64_t sse_long
= 0;
311 uint64_t sum_long
= 0;
312 highbd_variance64(a8
, a_stride
, b8
, b_stride
, w
, h
, &sse_long
, &sum_long
);
313 *sse
= (uint32_t)ROUND_POWER_OF_TWO(sse_long
, 4);
314 *sum
= (int)ROUND_POWER_OF_TWO(sum_long
, 2);
317 static void highbd_12_variance(const uint8_t *a8
, int a_stride
,
318 const uint8_t *b8
, int b_stride
,
319 int w
, int h
, uint32_t *sse
, int *sum
) {
320 uint64_t sse_long
= 0;
321 uint64_t sum_long
= 0;
322 highbd_variance64(a8
, a_stride
, b8
, b_stride
, w
, h
, &sse_long
, &sum_long
);
323 *sse
= (uint32_t)ROUND_POWER_OF_TWO(sse_long
, 8);
324 *sum
= (int)ROUND_POWER_OF_TWO(sum_long
, 4);
327 #define HIGHBD_VAR(W, H) \
328 uint32_t vpx_highbd_8_variance##W##x##H##_c(const uint8_t *a, \
334 highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
335 return *sse - (((int64_t)sum * sum) / (W * H)); \
338 uint32_t vpx_highbd_10_variance##W##x##H##_c(const uint8_t *a, \
344 highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
345 return *sse - (((int64_t)sum * sum) / (W * H)); \
348 uint32_t vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, \
354 highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
355 return *sse - (((int64_t)sum * sum) / (W * H)); \
358 #define HIGHBD_GET_VAR(S) \
359 void vpx_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
360 const uint8_t *ref, int ref_stride, \
361 uint32_t *sse, int *sum) { \
362 highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
365 void vpx_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
366 const uint8_t *ref, int ref_stride, \
367 uint32_t *sse, int *sum) { \
368 highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
371 void vpx_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
372 const uint8_t *ref, int ref_stride, \
373 uint32_t *sse, int *sum) { \
374 highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
377 #define HIGHBD_MSE(W, H) \
378 uint32_t vpx_highbd_8_mse##W##x##H##_c(const uint8_t *src, \
380 const uint8_t *ref, \
384 highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
388 uint32_t vpx_highbd_10_mse##W##x##H##_c(const uint8_t *src, \
390 const uint8_t *ref, \
394 highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
398 uint32_t vpx_highbd_12_mse##W##x##H##_c(const uint8_t *src, \
400 const uint8_t *ref, \
404 highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
408 static void highbd_var_filter_block2d_bil_first_pass(
409 const uint8_t *src_ptr8
,
410 uint16_t *output_ptr
,
411 unsigned int src_pixels_per_line
,
413 unsigned int output_height
,
414 unsigned int output_width
,
415 const uint8_t *filter
) {
417 uint16_t *src_ptr
= CONVERT_TO_SHORTPTR(src_ptr8
);
418 for (i
= 0; i
< output_height
; ++i
) {
419 for (j
= 0; j
< output_width
; ++j
) {
421 ROUND_POWER_OF_TWO((int)src_ptr
[0] * filter
[0] +
422 (int)src_ptr
[pixel_step
] * filter
[1],
429 src_ptr
+= src_pixels_per_line
- output_width
;
430 output_ptr
+= output_width
;
434 static void highbd_var_filter_block2d_bil_second_pass(
435 const uint16_t *src_ptr
,
436 uint16_t *output_ptr
,
437 unsigned int src_pixels_per_line
,
438 unsigned int pixel_step
,
439 unsigned int output_height
,
440 unsigned int output_width
,
441 const uint8_t *filter
) {
444 for (i
= 0; i
< output_height
; ++i
) {
445 for (j
= 0; j
< output_width
; ++j
) {
447 ROUND_POWER_OF_TWO((int)src_ptr
[0] * filter
[0] +
448 (int)src_ptr
[pixel_step
] * filter
[1],
453 src_ptr
+= src_pixels_per_line
- output_width
;
454 output_ptr
+= output_width
;
458 #define HIGHBD_SUBPIX_VAR(W, H) \
459 uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \
460 const uint8_t *src, int src_stride, \
461 int xoffset, int yoffset, \
462 const uint8_t *dst, int dst_stride, \
464 uint16_t fdata3[(H + 1) * W]; \
465 uint16_t temp2[H * W]; \
467 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
468 W, bilinear_filters[xoffset]); \
469 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
470 bilinear_filters[yoffset]); \
472 return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
476 uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \
477 const uint8_t *src, int src_stride, \
478 int xoffset, int yoffset, \
479 const uint8_t *dst, int dst_stride, \
481 uint16_t fdata3[(H + 1) * W]; \
482 uint16_t temp2[H * W]; \
484 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
485 W, bilinear_filters[xoffset]); \
486 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
487 bilinear_filters[yoffset]); \
489 return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
490 W, dst, dst_stride, sse); \
493 uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \
494 const uint8_t *src, int src_stride, \
495 int xoffset, int yoffset, \
496 const uint8_t *dst, int dst_stride, \
498 uint16_t fdata3[(H + 1) * W]; \
499 uint16_t temp2[H * W]; \
501 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
502 W, bilinear_filters[xoffset]); \
503 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
504 bilinear_filters[yoffset]); \
506 return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
507 W, dst, dst_stride, sse); \
510 #define HIGHBD_SUBPIX_AVG_VAR(W, H) \
511 uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \
512 const uint8_t *src, int src_stride, \
513 int xoffset, int yoffset, \
514 const uint8_t *dst, int dst_stride, \
516 const uint8_t *second_pred) { \
517 uint16_t fdata3[(H + 1) * W]; \
518 uint16_t temp2[H * W]; \
519 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
521 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
522 W, bilinear_filters[xoffset]); \
523 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
524 bilinear_filters[yoffset]); \
526 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
527 CONVERT_TO_BYTEPTR(temp2), W); \
529 return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
533 uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
534 const uint8_t *src, int src_stride, \
535 int xoffset, int yoffset, \
536 const uint8_t *dst, int dst_stride, \
538 const uint8_t *second_pred) { \
539 uint16_t fdata3[(H + 1) * W]; \
540 uint16_t temp2[H * W]; \
541 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
543 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
544 W, bilinear_filters[xoffset]); \
545 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
546 bilinear_filters[yoffset]); \
548 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
549 CONVERT_TO_BYTEPTR(temp2), W); \
551 return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
552 W, dst, dst_stride, sse); \
555 uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
556 const uint8_t *src, int src_stride, \
557 int xoffset, int yoffset, \
558 const uint8_t *dst, int dst_stride, \
560 const uint8_t *second_pred) { \
561 uint16_t fdata3[(H + 1) * W]; \
562 uint16_t temp2[H * W]; \
563 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
565 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
566 W, bilinear_filters[xoffset]); \
567 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
568 bilinear_filters[yoffset]); \
570 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
571 CONVERT_TO_BYTEPTR(temp2), W); \
573 return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
574 W, dst, dst_stride, sse); \
577 /* All three forms of the variance are available in the same sizes. */
578 #define HIGHBD_VARIANCES(W, H) \
580 HIGHBD_SUBPIX_VAR(W, H) \
581 HIGHBD_SUBPIX_AVG_VAR(W, H)
583 HIGHBD_VARIANCES(64, 64)
584 HIGHBD_VARIANCES(64, 32)
585 HIGHBD_VARIANCES(32, 64)
586 HIGHBD_VARIANCES(32, 32)
587 HIGHBD_VARIANCES(32, 16)
588 HIGHBD_VARIANCES(16, 32)
589 HIGHBD_VARIANCES(16, 16)
590 HIGHBD_VARIANCES(16, 8)
591 HIGHBD_VARIANCES(8, 16)
592 HIGHBD_VARIANCES(8, 8)
593 HIGHBD_VARIANCES(8, 4)
594 HIGHBD_VARIANCES(4, 8)
595 HIGHBD_VARIANCES(4, 4)
605 void vpx_highbd_comp_avg_pred(uint16_t *comp_pred
, const uint8_t *pred8
,
606 int width
, int height
, const uint8_t *ref8
,
609 uint16_t *pred
= CONVERT_TO_SHORTPTR(pred8
);
610 uint16_t *ref
= CONVERT_TO_SHORTPTR(ref8
);
611 for (i
= 0; i
< height
; ++i
) {
612 for (j
= 0; j
< width
; ++j
) {
613 const int tmp
= pred
[j
] + ref
[j
];
614 comp_pred
[j
] = ROUND_POWER_OF_TWO(tmp
, 1);
621 #endif // CONFIG_VP9_HIGHBITDEPTH