2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 #include "config/aom_config.h"
16 #include "config/aom_dsp_rtcd.h"
18 #include "aom/aom_integer.h"
19 #include "aom_ports/mem.h"
21 #include "aom_dsp/aom_filter.h"
22 #include "aom_dsp/blend.h"
23 #include "aom_dsp/variance.h"
25 #include "av1/common/filter.h"
26 #include "av1/common/reconinter.h"
28 uint32_t aom_get4x4sse_cs_c(const uint8_t *a
, int a_stride
, const uint8_t *b
,
33 for (r
= 0; r
< 4; ++r
) {
34 for (c
= 0; c
< 4; ++c
) {
35 int diff
= a
[c
] - b
[c
];
36 distortion
+= diff
* diff
;
46 uint32_t aom_get_mb_ss_c(const int16_t *a
) {
47 unsigned int i
, sum
= 0;
49 for (i
= 0; i
< 256; ++i
) {
56 static void variance(const uint8_t *a
, int a_stride
, const uint8_t *b
,
57 int b_stride
, int w
, int h
, uint32_t *sse
, int *sum
) {
63 for (i
= 0; i
< h
; ++i
) {
64 for (j
= 0; j
< w
; ++j
) {
65 const int diff
= a
[j
] - b
[j
];
75 uint32_t aom_sse_odd_size(const uint8_t *a
, int a_stride
, const uint8_t *b
,
76 int b_stride
, int w
, int h
) {
79 variance(a
, a_stride
, b
, b_stride
, w
, h
, &sse
, &sum
);
83 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
84 // or vertical direction to produce the filtered output block. Used to implement
85 // the first-pass of 2-D separable filter.
87 // Produces int16_t output to retain precision for the next pass. Two filter
88 // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
89 // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
90 // It defines the offset required to move from one input to the next.
91 void aom_var_filter_block2d_bil_first_pass_c(const uint8_t *a
, uint16_t *b
,
92 unsigned int src_pixels_per_line
,
93 unsigned int pixel_step
,
94 unsigned int output_height
,
95 unsigned int output_width
,
96 const uint8_t *filter
) {
99 for (i
= 0; i
< output_height
; ++i
) {
100 for (j
= 0; j
< output_width
; ++j
) {
101 b
[j
] = ROUND_POWER_OF_TWO(
102 (int)a
[0] * filter
[0] + (int)a
[pixel_step
] * filter
[1], FILTER_BITS
);
107 a
+= src_pixels_per_line
- output_width
;
112 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
113 // or vertical direction to produce the filtered output block. Used to implement
114 // the second-pass of 2-D separable filter.
116 // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
117 // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
118 // filter is applied horizontally (pixel_step = 1) or vertically
119 // (pixel_step = stride). It defines the offset required to move from one input
120 // to the next. Output is 8-bit.
121 void aom_var_filter_block2d_bil_second_pass_c(const uint16_t *a
, uint8_t *b
,
122 unsigned int src_pixels_per_line
,
123 unsigned int pixel_step
,
124 unsigned int output_height
,
125 unsigned int output_width
,
126 const uint8_t *filter
) {
129 for (i
= 0; i
< output_height
; ++i
) {
130 for (j
= 0; j
< output_width
; ++j
) {
131 b
[j
] = ROUND_POWER_OF_TWO(
132 (int)a
[0] * filter
[0] + (int)a
[pixel_step
] * filter
[1], FILTER_BITS
);
136 a
+= src_pixels_per_line
- output_width
;
142 uint32_t aom_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
143 const uint8_t *b, int b_stride, \
146 variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
147 return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
150 #define SUBPIX_VAR(W, H) \
151 uint32_t aom_sub_pixel_variance##W##x##H##_c( \
152 const uint8_t *a, int a_stride, int xoffset, int yoffset, \
153 const uint8_t *b, int b_stride, uint32_t *sse) { \
154 uint16_t fdata3[(H + 1) * W]; \
155 uint8_t temp2[H * W]; \
157 aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W, \
158 bilinear_filters_2t[xoffset]); \
159 aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
160 bilinear_filters_2t[yoffset]); \
162 return aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \
165 #define SUBPIX_AVG_VAR(W, H) \
166 uint32_t aom_sub_pixel_avg_variance##W##x##H##_c( \
167 const uint8_t *a, int a_stride, int xoffset, int yoffset, \
168 const uint8_t *b, int b_stride, uint32_t *sse, \
169 const uint8_t *second_pred) { \
170 uint16_t fdata3[(H + 1) * W]; \
171 uint8_t temp2[H * W]; \
172 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
174 aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W, \
175 bilinear_filters_2t[xoffset]); \
176 aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
177 bilinear_filters_2t[yoffset]); \
179 aom_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
181 return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse); \
183 uint32_t aom_dist_wtd_sub_pixel_avg_variance##W##x##H##_c( \
184 const uint8_t *a, int a_stride, int xoffset, int yoffset, \
185 const uint8_t *b, int b_stride, uint32_t *sse, \
186 const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param) { \
187 uint16_t fdata3[(H + 1) * W]; \
188 uint8_t temp2[H * W]; \
189 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
191 aom_var_filter_block2d_bil_first_pass_c(a, fdata3, a_stride, 1, H + 1, W, \
192 bilinear_filters_2t[xoffset]); \
193 aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
194 bilinear_filters_2t[yoffset]); \
196 aom_dist_wtd_comp_avg_pred(temp3, second_pred, W, H, temp2, W, jcp_param); \
198 return aom_variance##W##x##H(temp3, W, b, b_stride, sse); \
201 /* Identical to the variance call except it takes an additional parameter, sum,
202 * and returns that value using pass-by-reference instead of returning
205 #define GET_VAR(W, H) \
206 void aom_get##W##x##H##var_c(const uint8_t *a, int a_stride, \
207 const uint8_t *b, int b_stride, uint32_t *sse, \
209 variance(a, a_stride, b, b_stride, W, H, sse, sum); \
212 /* Identical to the variance call except it does not calculate the
213 * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
217 uint32_t aom_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
218 const uint8_t *b, int b_stride, \
221 variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
225 /* All three forms of the variance are available in the same sizes. */
226 #define VARIANCES(W, H) \
251 // Realtime mode doesn't use rectangular blocks.
252 #if !CONFIG_REALTIME_ONLY
269 void aom_comp_avg_pred_c(uint8_t *comp_pred
, const uint8_t *pred
, int width
,
270 int height
, const uint8_t *ref
, int ref_stride
) {
273 for (i
= 0; i
< height
; ++i
) {
274 for (j
= 0; j
< width
; ++j
) {
275 const int tmp
= pred
[j
] + ref
[j
];
276 comp_pred
[j
] = ROUND_POWER_OF_TWO(tmp
, 1);
284 void aom_dist_wtd_comp_avg_pred_c(uint8_t *comp_pred
, const uint8_t *pred
,
285 int width
, int height
, const uint8_t *ref
,
287 const DIST_WTD_COMP_PARAMS
*jcp_param
) {
289 const int fwd_offset
= jcp_param
->fwd_offset
;
290 const int bck_offset
= jcp_param
->bck_offset
;
292 for (i
= 0; i
< height
; ++i
) {
293 for (j
= 0; j
< width
; ++j
) {
294 int tmp
= pred
[j
] * bck_offset
+ ref
[j
] * fwd_offset
;
295 tmp
= ROUND_POWER_OF_TWO(tmp
, DIST_PRECISION_BITS
);
296 comp_pred
[j
] = (uint8_t)tmp
;
304 #if CONFIG_AV1_HIGHBITDEPTH
305 static void highbd_variance64(const uint8_t *a8
, int a_stride
,
306 const uint8_t *b8
, int b_stride
, int w
, int h
,
307 uint64_t *sse
, int64_t *sum
) {
308 const uint16_t *a
= CONVERT_TO_SHORTPTR(a8
);
309 const uint16_t *b
= CONVERT_TO_SHORTPTR(b8
);
312 for (int i
= 0; i
< h
; ++i
) {
314 for (int j
= 0; j
< w
; ++j
) {
315 const int diff
= a
[j
] - b
[j
];
317 tsse
+= (uint32_t)(diff
* diff
);
327 uint64_t aom_highbd_sse_odd_size(const uint8_t *a
, int a_stride
,
328 const uint8_t *b
, int b_stride
, int w
, int h
) {
331 highbd_variance64(a
, a_stride
, b
, b_stride
, w
, h
, &sse
, &sum
);
335 static void highbd_8_variance(const uint8_t *a8
, int a_stride
,
336 const uint8_t *b8
, int b_stride
, int w
, int h
,
337 uint32_t *sse
, int *sum
) {
338 uint64_t sse_long
= 0;
339 int64_t sum_long
= 0;
340 highbd_variance64(a8
, a_stride
, b8
, b_stride
, w
, h
, &sse_long
, &sum_long
);
341 *sse
= (uint32_t)sse_long
;
342 *sum
= (int)sum_long
;
345 static void highbd_10_variance(const uint8_t *a8
, int a_stride
,
346 const uint8_t *b8
, int b_stride
, int w
, int h
,
347 uint32_t *sse
, int *sum
) {
348 uint64_t sse_long
= 0;
349 int64_t sum_long
= 0;
350 highbd_variance64(a8
, a_stride
, b8
, b_stride
, w
, h
, &sse_long
, &sum_long
);
351 *sse
= (uint32_t)ROUND_POWER_OF_TWO(sse_long
, 4);
352 *sum
= (int)ROUND_POWER_OF_TWO(sum_long
, 2);
355 static void highbd_12_variance(const uint8_t *a8
, int a_stride
,
356 const uint8_t *b8
, int b_stride
, int w
, int h
,
357 uint32_t *sse
, int *sum
) {
358 uint64_t sse_long
= 0;
359 int64_t sum_long
= 0;
360 highbd_variance64(a8
, a_stride
, b8
, b_stride
, w
, h
, &sse_long
, &sum_long
);
361 *sse
= (uint32_t)ROUND_POWER_OF_TWO(sse_long
, 8);
362 *sum
= (int)ROUND_POWER_OF_TWO(sum_long
, 4);
365 #define HIGHBD_VAR(W, H) \
366 uint32_t aom_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
367 const uint8_t *b, int b_stride, \
370 highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
371 return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
374 uint32_t aom_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
375 const uint8_t *b, int b_stride, \
379 highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
380 var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
381 return (var >= 0) ? (uint32_t)var : 0; \
384 uint32_t aom_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
385 const uint8_t *b, int b_stride, \
389 highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
390 var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
391 return (var >= 0) ? (uint32_t)var : 0; \
394 #define HIGHBD_GET_VAR(S) \
395 void aom_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
396 const uint8_t *ref, int ref_stride, \
397 uint32_t *sse, int *sum) { \
398 highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
401 void aom_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
402 const uint8_t *ref, int ref_stride, \
403 uint32_t *sse, int *sum) { \
404 highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
407 void aom_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
408 const uint8_t *ref, int ref_stride, \
409 uint32_t *sse, int *sum) { \
410 highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
413 #define HIGHBD_MSE(W, H) \
414 uint32_t aom_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
415 const uint8_t *ref, int ref_stride, \
418 highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
422 uint32_t aom_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
423 const uint8_t *ref, int ref_stride, \
426 highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
430 uint32_t aom_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
431 const uint8_t *ref, int ref_stride, \
434 highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
438 void aom_highbd_var_filter_block2d_bil_first_pass(
439 const uint8_t *src_ptr8
, uint16_t *output_ptr
,
440 unsigned int src_pixels_per_line
, int pixel_step
,
441 unsigned int output_height
, unsigned int output_width
,
442 const uint8_t *filter
) {
444 uint16_t *src_ptr
= CONVERT_TO_SHORTPTR(src_ptr8
);
445 for (i
= 0; i
< output_height
; ++i
) {
446 for (j
= 0; j
< output_width
; ++j
) {
447 output_ptr
[j
] = ROUND_POWER_OF_TWO(
448 (int)src_ptr
[0] * filter
[0] + (int)src_ptr
[pixel_step
] * filter
[1],
455 src_ptr
+= src_pixels_per_line
- output_width
;
456 output_ptr
+= output_width
;
460 void aom_highbd_var_filter_block2d_bil_second_pass(
461 const uint16_t *src_ptr
, uint16_t *output_ptr
,
462 unsigned int src_pixels_per_line
, unsigned int pixel_step
,
463 unsigned int output_height
, unsigned int output_width
,
464 const uint8_t *filter
) {
467 for (i
= 0; i
< output_height
; ++i
) {
468 for (j
= 0; j
< output_width
; ++j
) {
469 output_ptr
[j
] = ROUND_POWER_OF_TWO(
470 (int)src_ptr
[0] * filter
[0] + (int)src_ptr
[pixel_step
] * filter
[1],
475 src_ptr
+= src_pixels_per_line
- output_width
;
476 output_ptr
+= output_width
;
480 #define HIGHBD_SUBPIX_VAR(W, H) \
481 uint32_t aom_highbd_8_sub_pixel_variance##W##x##H##_c( \
482 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
483 const uint8_t *dst, int dst_stride, uint32_t *sse) { \
484 uint16_t fdata3[(H + 1) * W]; \
485 uint16_t temp2[H * W]; \
487 aom_highbd_var_filter_block2d_bil_first_pass( \
488 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
489 aom_highbd_var_filter_block2d_bil_second_pass( \
490 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
492 return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
493 dst, dst_stride, sse); \
496 uint32_t aom_highbd_10_sub_pixel_variance##W##x##H##_c( \
497 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
498 const uint8_t *dst, int dst_stride, uint32_t *sse) { \
499 uint16_t fdata3[(H + 1) * W]; \
500 uint16_t temp2[H * W]; \
502 aom_highbd_var_filter_block2d_bil_first_pass( \
503 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
504 aom_highbd_var_filter_block2d_bil_second_pass( \
505 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
507 return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
508 dst, dst_stride, sse); \
511 uint32_t aom_highbd_12_sub_pixel_variance##W##x##H##_c( \
512 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
513 const uint8_t *dst, int dst_stride, uint32_t *sse) { \
514 uint16_t fdata3[(H + 1) * W]; \
515 uint16_t temp2[H * W]; \
517 aom_highbd_var_filter_block2d_bil_first_pass( \
518 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
519 aom_highbd_var_filter_block2d_bil_second_pass( \
520 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
522 return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
523 dst, dst_stride, sse); \
526 #define HIGHBD_SUBPIX_AVG_VAR(W, H) \
527 uint32_t aom_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \
528 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
529 const uint8_t *dst, int dst_stride, uint32_t *sse, \
530 const uint8_t *second_pred) { \
531 uint16_t fdata3[(H + 1) * W]; \
532 uint16_t temp2[H * W]; \
533 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
535 aom_highbd_var_filter_block2d_bil_first_pass( \
536 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
537 aom_highbd_var_filter_block2d_bil_second_pass( \
538 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
540 aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \
541 CONVERT_TO_BYTEPTR(temp2), W); \
543 return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
544 dst, dst_stride, sse); \
547 uint32_t aom_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
548 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
549 const uint8_t *dst, int dst_stride, uint32_t *sse, \
550 const uint8_t *second_pred) { \
551 uint16_t fdata3[(H + 1) * W]; \
552 uint16_t temp2[H * W]; \
553 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
555 aom_highbd_var_filter_block2d_bil_first_pass( \
556 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
557 aom_highbd_var_filter_block2d_bil_second_pass( \
558 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
560 aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \
561 CONVERT_TO_BYTEPTR(temp2), W); \
563 return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
564 dst, dst_stride, sse); \
567 uint32_t aom_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
568 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
569 const uint8_t *dst, int dst_stride, uint32_t *sse, \
570 const uint8_t *second_pred) { \
571 uint16_t fdata3[(H + 1) * W]; \
572 uint16_t temp2[H * W]; \
573 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
575 aom_highbd_var_filter_block2d_bil_first_pass( \
576 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
577 aom_highbd_var_filter_block2d_bil_second_pass( \
578 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
580 aom_highbd_comp_avg_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \
581 CONVERT_TO_BYTEPTR(temp2), W); \
583 return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
584 dst, dst_stride, sse); \
587 uint32_t aom_highbd_8_dist_wtd_sub_pixel_avg_variance##W##x##H##_c( \
588 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
589 const uint8_t *dst, int dst_stride, uint32_t *sse, \
590 const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param) { \
591 uint16_t fdata3[(H + 1) * W]; \
592 uint16_t temp2[H * W]; \
593 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
595 aom_highbd_var_filter_block2d_bil_first_pass( \
596 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
597 aom_highbd_var_filter_block2d_bil_second_pass( \
598 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
600 aom_highbd_dist_wtd_comp_avg_pred(CONVERT_TO_BYTEPTR(temp3), second_pred, \
601 W, H, CONVERT_TO_BYTEPTR(temp2), W, \
604 return aom_highbd_8_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \
608 uint32_t aom_highbd_10_dist_wtd_sub_pixel_avg_variance##W##x##H##_c( \
609 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
610 const uint8_t *dst, int dst_stride, uint32_t *sse, \
611 const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param) { \
612 uint16_t fdata3[(H + 1) * W]; \
613 uint16_t temp2[H * W]; \
614 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
616 aom_highbd_var_filter_block2d_bil_first_pass( \
617 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
618 aom_highbd_var_filter_block2d_bil_second_pass( \
619 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
621 aom_highbd_dist_wtd_comp_avg_pred(CONVERT_TO_BYTEPTR(temp3), second_pred, \
622 W, H, CONVERT_TO_BYTEPTR(temp2), W, \
625 return aom_highbd_10_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \
629 uint32_t aom_highbd_12_dist_wtd_sub_pixel_avg_variance##W##x##H##_c( \
630 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
631 const uint8_t *dst, int dst_stride, uint32_t *sse, \
632 const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param) { \
633 uint16_t fdata3[(H + 1) * W]; \
634 uint16_t temp2[H * W]; \
635 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
637 aom_highbd_var_filter_block2d_bil_first_pass( \
638 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
639 aom_highbd_var_filter_block2d_bil_second_pass( \
640 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
642 aom_highbd_dist_wtd_comp_avg_pred(CONVERT_TO_BYTEPTR(temp3), second_pred, \
643 W, H, CONVERT_TO_BYTEPTR(temp2), W, \
646 return aom_highbd_12_variance##W##x##H(CONVERT_TO_BYTEPTR(temp3), W, dst, \
650 /* All three forms of the variance are available in the same sizes. */
651 #define HIGHBD_VARIANCES(W, H) \
653 HIGHBD_SUBPIX_VAR(W, H) \
654 HIGHBD_SUBPIX_AVG_VAR(W, H)
656 HIGHBD_VARIANCES(128, 128)
657 HIGHBD_VARIANCES(128, 64)
658 HIGHBD_VARIANCES(64, 128)
659 HIGHBD_VARIANCES(64, 64)
660 HIGHBD_VARIANCES(64, 32)
661 HIGHBD_VARIANCES(32, 64)
662 HIGHBD_VARIANCES(32, 32)
663 HIGHBD_VARIANCES(32, 16)
664 HIGHBD_VARIANCES(16, 32)
665 HIGHBD_VARIANCES(16, 16)
666 HIGHBD_VARIANCES(16, 8)
667 HIGHBD_VARIANCES(8, 16)
668 HIGHBD_VARIANCES(8, 8)
669 HIGHBD_VARIANCES(8, 4)
670 HIGHBD_VARIANCES(4, 8)
671 HIGHBD_VARIANCES(4, 4)
672 HIGHBD_VARIANCES(4, 2)
673 HIGHBD_VARIANCES(2, 4)
674 HIGHBD_VARIANCES(2, 2)
676 // Realtime mode doesn't use 4x rectangular blocks.
677 #if !CONFIG_REALTIME_ONLY
678 HIGHBD_VARIANCES(4, 16)
679 HIGHBD_VARIANCES(16, 4)
680 HIGHBD_VARIANCES(8, 32)
681 HIGHBD_VARIANCES(32, 8)
682 HIGHBD_VARIANCES(16, 64)
683 HIGHBD_VARIANCES(64, 16)
694 void aom_highbd_comp_avg_pred_c(uint8_t *comp_pred8
, const uint8_t *pred8
,
695 int width
, int height
, const uint8_t *ref8
,
698 uint16_t *pred
= CONVERT_TO_SHORTPTR(pred8
);
699 uint16_t *ref
= CONVERT_TO_SHORTPTR(ref8
);
700 uint16_t *comp_pred
= CONVERT_TO_SHORTPTR(comp_pred8
);
701 for (i
= 0; i
< height
; ++i
) {
702 for (j
= 0; j
< width
; ++j
) {
703 const int tmp
= pred
[j
] + ref
[j
];
704 comp_pred
[j
] = ROUND_POWER_OF_TWO(tmp
, 1);
712 void aom_highbd_dist_wtd_comp_avg_pred_c(
713 uint8_t *comp_pred8
, const uint8_t *pred8
, int width
, int height
,
714 const uint8_t *ref8
, int ref_stride
,
715 const DIST_WTD_COMP_PARAMS
*jcp_param
) {
717 const int fwd_offset
= jcp_param
->fwd_offset
;
718 const int bck_offset
= jcp_param
->bck_offset
;
719 uint16_t *pred
= CONVERT_TO_SHORTPTR(pred8
);
720 uint16_t *ref
= CONVERT_TO_SHORTPTR(ref8
);
721 uint16_t *comp_pred
= CONVERT_TO_SHORTPTR(comp_pred8
);
723 for (i
= 0; i
< height
; ++i
) {
724 for (j
= 0; j
< width
; ++j
) {
725 int tmp
= pred
[j
] * bck_offset
+ ref
[j
] * fwd_offset
;
726 tmp
= ROUND_POWER_OF_TWO(tmp
, DIST_PRECISION_BITS
);
727 comp_pred
[j
] = (uint16_t)tmp
;
734 #endif // CONFIG_AV1_HIGHBITDEPTH
736 void aom_comp_mask_pred_c(uint8_t *comp_pred
, const uint8_t *pred
, int width
,
737 int height
, const uint8_t *ref
, int ref_stride
,
738 const uint8_t *mask
, int mask_stride
,
741 const uint8_t *src0
= invert_mask
? pred
: ref
;
742 const uint8_t *src1
= invert_mask
? ref
: pred
;
743 const int stride0
= invert_mask
? width
: ref_stride
;
744 const int stride1
= invert_mask
? ref_stride
: width
;
745 for (i
= 0; i
< height
; ++i
) {
746 for (j
= 0; j
< width
; ++j
) {
747 comp_pred
[j
] = AOM_BLEND_A64(mask
[j
], src0
[j
], src1
[j
]);
756 #define MASK_SUBPIX_VAR(W, H) \
757 unsigned int aom_masked_sub_pixel_variance##W##x##H##_c( \
758 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
759 const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
760 const uint8_t *msk, int msk_stride, int invert_mask, \
761 unsigned int *sse) { \
762 uint16_t fdata3[(H + 1) * W]; \
763 uint8_t temp2[H * W]; \
764 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
766 aom_var_filter_block2d_bil_first_pass_c(src, fdata3, src_stride, 1, H + 1, \
767 W, bilinear_filters_2t[xoffset]); \
768 aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
769 bilinear_filters_2t[yoffset]); \
771 aom_comp_mask_pred_c(temp3, second_pred, W, H, temp2, W, msk, msk_stride, \
773 return aom_variance##W##x##H##_c(temp3, W, ref, ref_stride, sse); \
776 MASK_SUBPIX_VAR(4, 4)
777 MASK_SUBPIX_VAR(4, 8)
778 MASK_SUBPIX_VAR(8, 4)
779 MASK_SUBPIX_VAR(8, 8)
780 MASK_SUBPIX_VAR(8, 16)
781 MASK_SUBPIX_VAR(16, 8)
782 MASK_SUBPIX_VAR(16, 16)
783 MASK_SUBPIX_VAR(16, 32)
784 MASK_SUBPIX_VAR(32, 16)
785 MASK_SUBPIX_VAR(32, 32)
786 MASK_SUBPIX_VAR(32, 64)
787 MASK_SUBPIX_VAR(64, 32)
788 MASK_SUBPIX_VAR(64, 64)
789 MASK_SUBPIX_VAR(64, 128)
790 MASK_SUBPIX_VAR(128, 64)
791 MASK_SUBPIX_VAR(128, 128)
793 // Realtime mode doesn't use 4x rectangular blocks.
794 #if !CONFIG_REALTIME_ONLY
795 MASK_SUBPIX_VAR(4, 16)
796 MASK_SUBPIX_VAR(16, 4)
797 MASK_SUBPIX_VAR(8, 32)
798 MASK_SUBPIX_VAR(32, 8)
799 MASK_SUBPIX_VAR(16, 64)
800 MASK_SUBPIX_VAR(64, 16)
803 #if CONFIG_AV1_HIGHBITDEPTH
804 void aom_highbd_comp_mask_pred_c(uint8_t *comp_pred8
, const uint8_t *pred8
,
805 int width
, int height
, const uint8_t *ref8
,
806 int ref_stride
, const uint8_t *mask
,
807 int mask_stride
, int invert_mask
) {
809 uint16_t *pred
= CONVERT_TO_SHORTPTR(pred8
);
810 uint16_t *ref
= CONVERT_TO_SHORTPTR(ref8
);
811 uint16_t *comp_pred
= CONVERT_TO_SHORTPTR(comp_pred8
);
812 for (i
= 0; i
< height
; ++i
) {
813 for (j
= 0; j
< width
; ++j
) {
815 comp_pred
[j
] = AOM_BLEND_A64(mask
[j
], ref
[j
], pred
[j
]);
817 comp_pred
[j
] = AOM_BLEND_A64(mask
[j
], pred
[j
], ref
[j
]);
826 #define HIGHBD_MASK_SUBPIX_VAR(W, H) \
827 unsigned int aom_highbd_8_masked_sub_pixel_variance##W##x##H##_c( \
828 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
829 const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
830 const uint8_t *msk, int msk_stride, int invert_mask, \
831 unsigned int *sse) { \
832 uint16_t fdata3[(H + 1) * W]; \
833 uint16_t temp2[H * W]; \
834 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
836 aom_highbd_var_filter_block2d_bil_first_pass( \
837 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
838 aom_highbd_var_filter_block2d_bil_second_pass( \
839 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
841 aom_highbd_comp_mask_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \
842 CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
845 return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
846 ref, ref_stride, sse); \
849 unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c( \
850 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
851 const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
852 const uint8_t *msk, int msk_stride, int invert_mask, \
853 unsigned int *sse) { \
854 uint16_t fdata3[(H + 1) * W]; \
855 uint16_t temp2[H * W]; \
856 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
858 aom_highbd_var_filter_block2d_bil_first_pass( \
859 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
860 aom_highbd_var_filter_block2d_bil_second_pass( \
861 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
863 aom_highbd_comp_mask_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \
864 CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
867 return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
868 ref, ref_stride, sse); \
871 unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c( \
872 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
873 const uint8_t *ref, int ref_stride, const uint8_t *second_pred, \
874 const uint8_t *msk, int msk_stride, int invert_mask, \
875 unsigned int *sse) { \
876 uint16_t fdata3[(H + 1) * W]; \
877 uint16_t temp2[H * W]; \
878 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
880 aom_highbd_var_filter_block2d_bil_first_pass( \
881 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
882 aom_highbd_var_filter_block2d_bil_second_pass( \
883 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
885 aom_highbd_comp_mask_pred_c(CONVERT_TO_BYTEPTR(temp3), second_pred, W, H, \
886 CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
889 return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
890 ref, ref_stride, sse); \
893 HIGHBD_MASK_SUBPIX_VAR(4, 4)
894 HIGHBD_MASK_SUBPIX_VAR(4, 8)
895 HIGHBD_MASK_SUBPIX_VAR(8, 4)
896 HIGHBD_MASK_SUBPIX_VAR(8, 8)
897 HIGHBD_MASK_SUBPIX_VAR(8, 16)
898 HIGHBD_MASK_SUBPIX_VAR(16, 8)
899 HIGHBD_MASK_SUBPIX_VAR(16, 16)
900 HIGHBD_MASK_SUBPIX_VAR(16, 32)
901 HIGHBD_MASK_SUBPIX_VAR(32, 16)
902 HIGHBD_MASK_SUBPIX_VAR(32, 32)
903 HIGHBD_MASK_SUBPIX_VAR(32, 64)
904 HIGHBD_MASK_SUBPIX_VAR(64, 32)
905 HIGHBD_MASK_SUBPIX_VAR(64, 64)
906 HIGHBD_MASK_SUBPIX_VAR(64, 128)
907 HIGHBD_MASK_SUBPIX_VAR(128, 64)
908 HIGHBD_MASK_SUBPIX_VAR(128, 128)
909 #if !CONFIG_REALTIME_ONLY
910 HIGHBD_MASK_SUBPIX_VAR(4, 16)
911 HIGHBD_MASK_SUBPIX_VAR(16, 4)
912 HIGHBD_MASK_SUBPIX_VAR(8, 32)
913 HIGHBD_MASK_SUBPIX_VAR(32, 8)
914 HIGHBD_MASK_SUBPIX_VAR(16, 64)
915 HIGHBD_MASK_SUBPIX_VAR(64, 16)
917 #endif // CONFIG_AV1_HIGHBITDEPTH
919 #if !CONFIG_REALTIME_ONLY
920 static INLINE
void obmc_variance(const uint8_t *pre
, int pre_stride
,
921 const int32_t *wsrc
, const int32_t *mask
,
922 int w
, int h
, unsigned int *sse
, int *sum
) {
928 for (i
= 0; i
< h
; i
++) {
929 for (j
= 0; j
< w
; j
++) {
930 int diff
= ROUND_POWER_OF_TWO_SIGNED(wsrc
[j
] - pre
[j
] * mask
[j
], 12);
941 #define OBMC_VAR(W, H) \
942 unsigned int aom_obmc_variance##W##x##H##_c( \
943 const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
944 const int32_t *mask, unsigned int *sse) { \
946 obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
947 return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
950 #define OBMC_SUBPIX_VAR(W, H) \
951 unsigned int aom_obmc_sub_pixel_variance##W##x##H##_c( \
952 const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
953 const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
954 uint16_t fdata3[(H + 1) * W]; \
955 uint8_t temp2[H * W]; \
957 aom_var_filter_block2d_bil_first_pass_c(pre, fdata3, pre_stride, 1, H + 1, \
958 W, bilinear_filters_2t[xoffset]); \
959 aom_var_filter_block2d_bil_second_pass_c(fdata3, temp2, W, W, H, W, \
960 bilinear_filters_2t[yoffset]); \
962 return aom_obmc_variance##W##x##H##_c(temp2, W, wsrc, mask, sse); \
966 OBMC_SUBPIX_VAR(4, 4)
969 OBMC_SUBPIX_VAR(4, 8)
972 OBMC_SUBPIX_VAR(8, 4)
975 OBMC_SUBPIX_VAR(8, 8)
978 OBMC_SUBPIX_VAR(8, 16)
981 OBMC_SUBPIX_VAR(16, 8)
984 OBMC_SUBPIX_VAR(16, 16)
987 OBMC_SUBPIX_VAR(16, 32)
990 OBMC_SUBPIX_VAR(32, 16)
993 OBMC_SUBPIX_VAR(32, 32)
996 OBMC_SUBPIX_VAR(32, 64)
999 OBMC_SUBPIX_VAR(64, 32)
1002 OBMC_SUBPIX_VAR(64, 64)
1005 OBMC_SUBPIX_VAR(64, 128)
1008 OBMC_SUBPIX_VAR(128, 64)
1011 OBMC_SUBPIX_VAR(128, 128)
1014 OBMC_SUBPIX_VAR(4, 16)
1016 OBMC_SUBPIX_VAR(16, 4)
1018 OBMC_SUBPIX_VAR(8, 32)
1020 OBMC_SUBPIX_VAR(32, 8)
1022 OBMC_SUBPIX_VAR(16, 64)
1024 OBMC_SUBPIX_VAR(64, 16)
1026 #if CONFIG_AV1_HIGHBITDEPTH
1027 static INLINE
void highbd_obmc_variance64(const uint8_t *pre8
, int pre_stride
,
1028 const int32_t *wsrc
,
1029 const int32_t *mask
, int w
, int h
,
1030 uint64_t *sse
, int64_t *sum
) {
1032 uint16_t *pre
= CONVERT_TO_SHORTPTR(pre8
);
1037 for (i
= 0; i
< h
; i
++) {
1038 for (j
= 0; j
< w
; j
++) {
1039 int diff
= ROUND_POWER_OF_TWO_SIGNED(wsrc
[j
] - pre
[j
] * mask
[j
], 12);
1041 *sse
+= diff
* diff
;
1050 static INLINE
void highbd_obmc_variance(const uint8_t *pre8
, int pre_stride
,
1051 const int32_t *wsrc
,
1052 const int32_t *mask
, int w
, int h
,
1053 unsigned int *sse
, int *sum
) {
1056 highbd_obmc_variance64(pre8
, pre_stride
, wsrc
, mask
, w
, h
, &sse64
, &sum64
);
1058 *sse
= (unsigned int)sse64
;
1061 static INLINE
void highbd_10_obmc_variance(const uint8_t *pre8
, int pre_stride
,
1062 const int32_t *wsrc
,
1063 const int32_t *mask
, int w
, int h
,
1064 unsigned int *sse
, int *sum
) {
1067 highbd_obmc_variance64(pre8
, pre_stride
, wsrc
, mask
, w
, h
, &sse64
, &sum64
);
1068 *sum
= (int)ROUND_POWER_OF_TWO(sum64
, 2);
1069 *sse
= (unsigned int)ROUND_POWER_OF_TWO(sse64
, 4);
1072 static INLINE
void highbd_12_obmc_variance(const uint8_t *pre8
, int pre_stride
,
1073 const int32_t *wsrc
,
1074 const int32_t *mask
, int w
, int h
,
1075 unsigned int *sse
, int *sum
) {
1078 highbd_obmc_variance64(pre8
, pre_stride
, wsrc
, mask
, w
, h
, &sse64
, &sum64
);
1079 *sum
= (int)ROUND_POWER_OF_TWO(sum64
, 4);
1080 *sse
= (unsigned int)ROUND_POWER_OF_TWO(sse64
, 8);
1083 #define HIGHBD_OBMC_VAR(W, H) \
1084 unsigned int aom_highbd_obmc_variance##W##x##H##_c( \
1085 const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
1086 const int32_t *mask, unsigned int *sse) { \
1088 highbd_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
1089 return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
1092 unsigned int aom_highbd_10_obmc_variance##W##x##H##_c( \
1093 const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
1094 const int32_t *mask, unsigned int *sse) { \
1097 highbd_10_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
1098 var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
1099 return (var >= 0) ? (uint32_t)var : 0; \
1102 unsigned int aom_highbd_12_obmc_variance##W##x##H##_c( \
1103 const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
1104 const int32_t *mask, unsigned int *sse) { \
1107 highbd_12_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
1108 var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
1109 return (var >= 0) ? (uint32_t)var : 0; \
1112 #define HIGHBD_OBMC_SUBPIX_VAR(W, H) \
1113 unsigned int aom_highbd_obmc_sub_pixel_variance##W##x##H##_c( \
1114 const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
1115 const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
1116 uint16_t fdata3[(H + 1) * W]; \
1117 uint16_t temp2[H * W]; \
1119 aom_highbd_var_filter_block2d_bil_first_pass( \
1120 pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
1121 aom_highbd_var_filter_block2d_bil_second_pass( \
1122 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
1124 return aom_highbd_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
1128 unsigned int aom_highbd_10_obmc_sub_pixel_variance##W##x##H##_c( \
1129 const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
1130 const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
1131 uint16_t fdata3[(H + 1) * W]; \
1132 uint16_t temp2[H * W]; \
1134 aom_highbd_var_filter_block2d_bil_first_pass( \
1135 pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
1136 aom_highbd_var_filter_block2d_bil_second_pass( \
1137 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
1139 return aom_highbd_10_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
1140 W, wsrc, mask, sse); \
1143 unsigned int aom_highbd_12_obmc_sub_pixel_variance##W##x##H##_c( \
1144 const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
1145 const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
1146 uint16_t fdata3[(H + 1) * W]; \
1147 uint16_t temp2[H * W]; \
1149 aom_highbd_var_filter_block2d_bil_first_pass( \
1150 pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
1151 aom_highbd_var_filter_block2d_bil_second_pass( \
1152 fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
1154 return aom_highbd_12_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
1155 W, wsrc, mask, sse); \
1158 HIGHBD_OBMC_VAR(4, 4)
1159 HIGHBD_OBMC_SUBPIX_VAR(4, 4)
1161 HIGHBD_OBMC_VAR(4, 8)
1162 HIGHBD_OBMC_SUBPIX_VAR(4, 8)
1164 HIGHBD_OBMC_VAR(8, 4)
1165 HIGHBD_OBMC_SUBPIX_VAR(8, 4)
1167 HIGHBD_OBMC_VAR(8, 8)
1168 HIGHBD_OBMC_SUBPIX_VAR(8, 8)
1170 HIGHBD_OBMC_VAR(8, 16)
1171 HIGHBD_OBMC_SUBPIX_VAR(8, 16)
1173 HIGHBD_OBMC_VAR(16, 8)
1174 HIGHBD_OBMC_SUBPIX_VAR(16, 8)
1176 HIGHBD_OBMC_VAR(16, 16)
1177 HIGHBD_OBMC_SUBPIX_VAR(16, 16)
1179 HIGHBD_OBMC_VAR(16, 32)
1180 HIGHBD_OBMC_SUBPIX_VAR(16, 32)
1182 HIGHBD_OBMC_VAR(32, 16)
1183 HIGHBD_OBMC_SUBPIX_VAR(32, 16)
1185 HIGHBD_OBMC_VAR(32, 32)
1186 HIGHBD_OBMC_SUBPIX_VAR(32, 32)
1188 HIGHBD_OBMC_VAR(32, 64)
1189 HIGHBD_OBMC_SUBPIX_VAR(32, 64)
1191 HIGHBD_OBMC_VAR(64, 32)
1192 HIGHBD_OBMC_SUBPIX_VAR(64, 32)
1194 HIGHBD_OBMC_VAR(64, 64)
1195 HIGHBD_OBMC_SUBPIX_VAR(64, 64)
1197 HIGHBD_OBMC_VAR(64, 128)
1198 HIGHBD_OBMC_SUBPIX_VAR(64, 128)
1200 HIGHBD_OBMC_VAR(128, 64)
1201 HIGHBD_OBMC_SUBPIX_VAR(128, 64)
1203 HIGHBD_OBMC_VAR(128, 128)
1204 HIGHBD_OBMC_SUBPIX_VAR(128, 128)
1206 HIGHBD_OBMC_VAR(4, 16)
1207 HIGHBD_OBMC_SUBPIX_VAR(4, 16)
1208 HIGHBD_OBMC_VAR(16, 4)
1209 HIGHBD_OBMC_SUBPIX_VAR(16, 4)
1210 HIGHBD_OBMC_VAR(8, 32)
1211 HIGHBD_OBMC_SUBPIX_VAR(8, 32)
1212 HIGHBD_OBMC_VAR(32, 8)
1213 HIGHBD_OBMC_SUBPIX_VAR(32, 8)
1214 HIGHBD_OBMC_VAR(16, 64)
1215 HIGHBD_OBMC_SUBPIX_VAR(16, 64)
1216 HIGHBD_OBMC_VAR(64, 16)
1217 HIGHBD_OBMC_SUBPIX_VAR(64, 16)
1218 #endif // CONFIG_AV1_HIGHBITDEPTH
1219 #endif // !CONFIG_REALTIME_ONLY
1221 uint64_t aom_mse_wxh_16bit_c(uint8_t *dst
, int dstride
, uint16_t *src
,
1222 int sstride
, int w
, int h
) {
1224 for (int i
= 0; i
< h
; i
++) {
1225 for (int j
= 0; j
< w
; j
++) {
1226 int e
= (uint16_t)dst
[i
* dstride
+ j
] - src
[i
* sstride
+ j
];
1233 uint64_t aom_mse_wxh_16bit_highbd_c(uint16_t *dst
, int dstride
, uint16_t *src
,
1234 int sstride
, int w
, int h
) {
1236 for (int i
= 0; i
< h
; i
++) {
1237 for (int j
= 0; j
< w
; j
++) {
1238 int e
= dst
[i
* dstride
+ j
] - src
[i
* sstride
+ j
];