Add ssse3 aom_smooth_h_predictor_16,32,64xh
[aom.git] / aom_dsp / aom_dsp_rtcd_defs.pl
blob75679f5ebe047ceb82035f1186fdf41c51e01a6f
1 ##
2 ## Copyright (c) 2017, Alliance for Open Media. All rights reserved
3 ##
4 ## This source code is subject to the terms of the BSD 2 Clause License and
5 ## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 ## was not distributed with this source code in the LICENSE file, you can
7 ## obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 ## Media Patent License 1.0 was not distributed with this source code in the
9 ## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
11 sub aom_dsp_forward_decls() {
12 print <<EOF
14 * DSP
17 #include "aom/aom_integer.h"
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "av1/common/enums.h"
20 #include "av1/common/blockd.h"
22 EOF
24 forward_decls qw/aom_dsp_forward_decls/;
26 # optimizations which depend on multiple features
27 $avx2_ssse3 = '';
28 if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) {
29 $avx2_ssse3 = 'avx2';
32 # functions that are 64 bit only.
33 $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
34 if ($opts{arch} eq "x86_64") {
35 $mmx_x86_64 = 'mmx';
36 $sse2_x86_64 = 'sse2';
37 $ssse3_x86_64 = 'ssse3';
38 $avx_x86_64 = 'avx';
39 $avx2_x86_64 = 'avx2';
42 @block_widths = (4, 8, 16, 32, 64, 128);
44 @block_sizes = ();
45 foreach $w (@block_widths) {
46 foreach $h (@block_widths) {
47 push @block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w) ;
50 push @block_sizes, [4, 16];
51 push @block_sizes, [16, 4];
52 push @block_sizes, [8, 32];
53 push @block_sizes, [32, 8];
54 push @block_sizes, [16, 64];
55 push @block_sizes, [64, 16];
57 @tx_dims = (2, 4, 8, 16, 32, 64);
58 @tx_sizes = ();
59 foreach $w (@tx_dims) {
60 push @tx_sizes, [$w, $w];
61 foreach $h (@tx_dims) {
62 push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w));
63 push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 4*$h || $h == 4*$w));
67 @pred_names = qw/dc dc_top dc_left dc_128 v h paeth smooth smooth_v smooth_h/;
70 # Intra prediction
73 foreach (@tx_sizes) {
74 ($w, $h) = @$_;
75 foreach $pred_name (@pred_names) {
76 add_proto "void", "aom_${pred_name}_predictor_${w}x${h}",
77 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
78 add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}",
79 "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
83 specialize qw/aom_dc_top_predictor_4x4 msa neon sse2/;
84 specialize qw/aom_dc_top_predictor_4x8 sse2/;
85 specialize qw/aom_dc_top_predictor_8x4 sse2/;
86 specialize qw/aom_dc_top_predictor_8x8 neon msa sse2/;
87 specialize qw/aom_dc_top_predictor_8x16 sse2/;
88 specialize qw/aom_dc_top_predictor_16x8 sse2/;
89 specialize qw/aom_dc_top_predictor_16x16 neon msa sse2/;
90 specialize qw/aom_dc_top_predictor_16x32 sse2/;
91 specialize qw/aom_dc_top_predictor_16x64 sse2/;
92 specialize qw/aom_dc_top_predictor_32x16 sse2 avx2/;
93 specialize qw/aom_dc_top_predictor_32x32 msa neon sse2 avx2/;
94 specialize qw/aom_dc_top_predictor_32x64 sse2 avx2/;
95 specialize qw/aom_dc_top_predictor_64x64 sse2 avx2/;
96 specialize qw/aom_dc_top_predictor_64x32 sse2 avx2/;
97 specialize qw/aom_dc_top_predictor_64x16 sse2 avx2/;
98 specialize qw/aom_dc_left_predictor_4x4 msa neon sse2/;
99 specialize qw/aom_dc_left_predictor_4x8 sse2/;
100 specialize qw/aom_dc_left_predictor_8x4 sse2/;
101 specialize qw/aom_dc_left_predictor_8x8 neon msa sse2/;
102 specialize qw/aom_dc_left_predictor_8x16 sse2/;
103 specialize qw/aom_dc_left_predictor_16x8 sse2/;
104 specialize qw/aom_dc_left_predictor_16x16 neon msa sse2/;
105 specialize qw/aom_dc_left_predictor_16x32 sse2/;
106 specialize qw/aom_dc_left_predictor_16x64 sse2/;
107 specialize qw/aom_dc_left_predictor_32x16 sse2 avx2/;
108 specialize qw/aom_dc_left_predictor_32x32 msa neon sse2 avx2/;
109 specialize qw/aom_dc_left_predictor_32x64 sse2 avx2/;
110 specialize qw/aom_dc_left_predictor_64x64 sse2 avx2/;
111 specialize qw/aom_dc_left_predictor_64x32 sse2 avx2/;
112 specialize qw/aom_dc_left_predictor_64x16 sse2 avx2/;
113 specialize qw/aom_dc_128_predictor_4x4 msa neon sse2/;
114 specialize qw/aom_dc_128_predictor_4x8 sse2/;
115 specialize qw/aom_dc_128_predictor_8x4 sse2/;
116 specialize qw/aom_dc_128_predictor_8x8 neon msa sse2/;
117 specialize qw/aom_dc_128_predictor_8x16 sse2/;
118 specialize qw/aom_dc_128_predictor_16x8 sse2/;
119 specialize qw/aom_dc_128_predictor_16x16 neon msa sse2/;
120 specialize qw/aom_dc_128_predictor_16x32 sse2/;
121 specialize qw/aom_dc_128_predictor_16x64 sse2/;
122 specialize qw/aom_dc_128_predictor_32x16 sse2 avx2/;
123 specialize qw/aom_dc_128_predictor_32x32 msa neon sse2 avx2/;
124 specialize qw/aom_dc_128_predictor_32x64 sse2 avx2/;
125 specialize qw/aom_dc_128_predictor_64x64 sse2 avx2/;
126 specialize qw/aom_dc_128_predictor_64x32 sse2 avx2/;
127 specialize qw/aom_dc_128_predictor_64x16 sse2 avx2/;
128 specialize qw/aom_v_predictor_4x4 neon msa sse2/;
129 specialize qw/aom_v_predictor_4x8 sse2/;
130 specialize qw/aom_v_predictor_8x4 sse2/;
131 specialize qw/aom_v_predictor_8x8 neon msa sse2/;
132 specialize qw/aom_v_predictor_8x16 sse2/;
133 specialize qw/aom_v_predictor_16x8 sse2/;
134 specialize qw/aom_v_predictor_16x16 neon msa sse2/;
135 specialize qw/aom_v_predictor_16x32 sse2/;
136 specialize qw/aom_v_predictor_16x64 sse2/;
137 specialize qw/aom_v_predictor_32x16 sse2 avx2/;
138 specialize qw/aom_v_predictor_32x32 neon msa sse2 avx2/;
139 specialize qw/aom_v_predictor_32x64 sse2 avx2/;
140 specialize qw/aom_v_predictor_64x64 sse2 avx2/;
141 specialize qw/aom_v_predictor_64x32 sse2 avx2/;
142 specialize qw/aom_v_predictor_64x16 sse2 avx2/;
143 specialize qw/aom_h_predictor_4x8 sse2/;
144 specialize qw/aom_h_predictor_4x4 neon dspr2 msa sse2/;
145 specialize qw/aom_h_predictor_8x4 sse2/;
146 specialize qw/aom_h_predictor_8x8 neon dspr2 msa sse2/;
147 specialize qw/aom_h_predictor_8x16 sse2/;
148 specialize qw/aom_h_predictor_16x8 sse2/;
149 specialize qw/aom_h_predictor_16x16 neon dspr2 msa sse2/;
150 specialize qw/aom_h_predictor_16x32 sse2/;
151 specialize qw/aom_h_predictor_16x64 sse2/;
152 specialize qw/aom_h_predictor_32x16 sse2/;
153 specialize qw/aom_h_predictor_32x32 neon msa sse2 avx2/;
154 specialize qw/aom_h_predictor_32x64 sse2/;
155 specialize qw/aom_h_predictor_64x64 sse2/;
156 specialize qw/aom_h_predictor_64x32 sse2/;
157 specialize qw/aom_h_predictor_64x16 sse2/;
158 specialize qw/aom_paeth_predictor_4x4 ssse3/;
159 specialize qw/aom_paeth_predictor_4x8 ssse3/;
160 specialize qw/aom_paeth_predictor_8x4 ssse3/;
161 specialize qw/aom_paeth_predictor_8x8 ssse3/;
162 specialize qw/aom_paeth_predictor_8x16 ssse3/;
163 specialize qw/aom_paeth_predictor_16x8 ssse3 avx2/;
164 specialize qw/aom_paeth_predictor_16x16 ssse3 avx2/;
165 specialize qw/aom_paeth_predictor_16x32 ssse3 avx2/;
166 specialize qw/aom_paeth_predictor_16x64 ssse3 avx2/;
167 specialize qw/aom_paeth_predictor_32x16 ssse3 avx2/;
168 specialize qw/aom_paeth_predictor_32x32 ssse3 avx2/;
169 specialize qw/aom_paeth_predictor_32x64 ssse3 avx2/;
170 specialize qw/aom_paeth_predictor_64x32 ssse3 avx2/;
171 specialize qw/aom_paeth_predictor_64x64 ssse3 avx2/;
172 specialize qw/aom_paeth_predictor_64x16 ssse3 avx2/;
173 specialize qw/aom_paeth_predictor_16x8 ssse3/;
174 specialize qw/aom_paeth_predictor_16x16 ssse3/;
175 specialize qw/aom_paeth_predictor_16x32 ssse3/;
176 specialize qw/aom_paeth_predictor_32x16 ssse3/;
177 specialize qw/aom_paeth_predictor_32x32 ssse3/;
178 specialize qw/aom_smooth_predictor_4x4 ssse3/;
179 specialize qw/aom_smooth_predictor_4x8 ssse3/;
180 specialize qw/aom_smooth_predictor_8x4 ssse3/;
181 specialize qw/aom_smooth_predictor_8x8 ssse3/;
182 specialize qw/aom_smooth_predictor_8x16 ssse3/;
183 specialize qw/aom_smooth_predictor_16x8 ssse3/;
184 specialize qw/aom_smooth_predictor_16x16 ssse3/;
185 specialize qw/aom_smooth_predictor_16x32 ssse3/;
186 specialize qw/aom_smooth_predictor_16x64 ssse3/;
187 specialize qw/aom_smooth_predictor_32x16 ssse3/;
188 specialize qw/aom_smooth_predictor_32x32 ssse3/;
189 specialize qw/aom_smooth_predictor_32x64 ssse3/;
190 specialize qw/aom_smooth_predictor_64x64 ssse3/;
191 specialize qw/aom_smooth_predictor_64x32 ssse3/;
192 specialize qw/aom_smooth_predictor_64x16 ssse3/;
194 specialize qw/aom_smooth_v_predictor_4x4 ssse3/;
195 specialize qw/aom_smooth_v_predictor_4x8 ssse3/;
196 specialize qw/aom_smooth_v_predictor_4x16 ssse3/;
197 specialize qw/aom_smooth_v_predictor_8x4 ssse3/;
198 specialize qw/aom_smooth_v_predictor_8x8 ssse3/;
199 specialize qw/aom_smooth_v_predictor_8x16 ssse3/;
200 specialize qw/aom_smooth_v_predictor_8x32 ssse3/;
201 specialize qw/aom_smooth_v_predictor_16x4 ssse3/;
202 specialize qw/aom_smooth_v_predictor_16x8 ssse3/;
203 specialize qw/aom_smooth_v_predictor_16x16 ssse3/;
204 specialize qw/aom_smooth_v_predictor_16x32 ssse3/;
205 specialize qw/aom_smooth_v_predictor_16x64 ssse3/;
206 specialize qw/aom_smooth_v_predictor_32x8 ssse3/;
207 specialize qw/aom_smooth_v_predictor_32x16 ssse3/;
208 specialize qw/aom_smooth_v_predictor_32x32 ssse3/;
209 specialize qw/aom_smooth_v_predictor_32x64 ssse3/;
210 specialize qw/aom_smooth_v_predictor_64x64 ssse3/;
211 specialize qw/aom_smooth_v_predictor_64x32 ssse3/;
212 specialize qw/aom_smooth_v_predictor_64x16 ssse3/;
214 specialize qw/aom_smooth_h_predictor_4x4 ssse3/;
215 specialize qw/aom_smooth_h_predictor_4x8 ssse3/;
216 specialize qw/aom_smooth_h_predictor_4x16 ssse3/;
217 specialize qw/aom_smooth_h_predictor_8x4 ssse3/;
218 specialize qw/aom_smooth_h_predictor_8x8 ssse3/;
219 specialize qw/aom_smooth_h_predictor_8x16 ssse3/;
220 specialize qw/aom_smooth_h_predictor_8x32 ssse3/;
221 specialize qw/aom_smooth_h_predictor_16x4 ssse3/;
222 specialize qw/aom_smooth_h_predictor_16x8 ssse3/;
223 specialize qw/aom_smooth_h_predictor_16x16 ssse3/;
224 specialize qw/aom_smooth_h_predictor_16x32 ssse3/;
225 specialize qw/aom_smooth_h_predictor_16x64 ssse3/;
226 specialize qw/aom_smooth_h_predictor_32x8 ssse3/;
227 specialize qw/aom_smooth_h_predictor_32x16 ssse3/;
228 specialize qw/aom_smooth_h_predictor_32x32 ssse3/;
229 specialize qw/aom_smooth_h_predictor_32x64 ssse3/;
230 specialize qw/aom_smooth_h_predictor_64x64 ssse3/;
231 specialize qw/aom_smooth_h_predictor_64x32 ssse3/;
232 specialize qw/aom_smooth_h_predictor_64x16 ssse3/;
234 # TODO(yunqingwang): optimize rectangular DC_PRED to replace division
235 # by multiply and shift.
236 specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
237 specialize qw/aom_dc_predictor_4x8 sse2/;
238 specialize qw/aom_dc_predictor_8x4 sse2/;
239 specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
240 specialize qw/aom_dc_predictor_8x16 sse2/;
241 specialize qw/aom_dc_predictor_16x8 sse2/;
242 specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
243 specialize qw/aom_dc_predictor_16x32 sse2/;
244 specialize qw/aom_dc_predictor_16x64 sse2/;
245 specialize qw/aom_dc_predictor_32x16 sse2 avx2/;
246 specialize qw/aom_dc_predictor_32x32 msa neon sse2 avx2/;
247 specialize qw/aom_dc_predictor_32x64 sse2 avx2/;
248 specialize qw/aom_dc_predictor_64x64 sse2 avx2/;
249 specialize qw/aom_dc_predictor_64x32 sse2 avx2/;
250 specialize qw/aom_dc_predictor_64x16 sse2 avx2/;
252 specialize qw/aom_highbd_v_predictor_4x4 sse2/;
253 specialize qw/aom_highbd_v_predictor_4x8 sse2/;
254 specialize qw/aom_highbd_v_predictor_8x4 sse2/;
255 specialize qw/aom_highbd_v_predictor_8x8 sse2/;
256 specialize qw/aom_highbd_v_predictor_8x16 sse2/;
257 specialize qw/aom_highbd_v_predictor_16x8 sse2/;
258 specialize qw/aom_highbd_v_predictor_16x16 sse2/;
259 specialize qw/aom_highbd_v_predictor_16x32 sse2/;
260 specialize qw/aom_highbd_v_predictor_32x16 sse2/;
261 specialize qw/aom_highbd_v_predictor_32x32 sse2/;
263 # TODO(yunqingwang): optimize rectangular DC_PRED to replace division
264 # by multiply and shift.
265 specialize qw/aom_highbd_dc_predictor_4x4 sse2/;
266 specialize qw/aom_highbd_dc_predictor_4x8 sse2/;
267 specialize qw/aom_highbd_dc_predictor_8x4 sse2/;;
268 specialize qw/aom_highbd_dc_predictor_8x8 sse2/;;
269 specialize qw/aom_highbd_dc_predictor_8x16 sse2/;;
270 specialize qw/aom_highbd_dc_predictor_16x8 sse2/;
271 specialize qw/aom_highbd_dc_predictor_16x16 sse2/;
272 specialize qw/aom_highbd_dc_predictor_16x32 sse2/;
273 specialize qw/aom_highbd_dc_predictor_32x16 sse2/;
274 specialize qw/aom_highbd_dc_predictor_32x32 sse2/;
276 specialize qw/aom_highbd_h_predictor_4x4 sse2/;
277 specialize qw/aom_highbd_h_predictor_4x8 sse2/;
278 specialize qw/aom_highbd_h_predictor_8x4 sse2/;
279 specialize qw/aom_highbd_h_predictor_8x8 sse2/;
280 specialize qw/aom_highbd_h_predictor_8x16 sse2/;
281 specialize qw/aom_highbd_h_predictor_16x8 sse2/;
282 specialize qw/aom_highbd_h_predictor_16x16 sse2/;
283 specialize qw/aom_highbd_h_predictor_16x32 sse2/;
284 specialize qw/aom_highbd_h_predictor_32x16 sse2/;
285 specialize qw/aom_highbd_h_predictor_32x32 sse2/;
286 specialize qw/aom_highbd_dc_left_predictor_4x4 sse2/;
287 specialize qw/aom_highbd_dc_top_predictor_4x4 sse2/;
288 specialize qw/aom_highbd_dc_128_predictor_4x4 sse2/;
289 specialize qw/aom_highbd_dc_left_predictor_4x8 sse2/;
290 specialize qw/aom_highbd_dc_top_predictor_4x8 sse2/;
291 specialize qw/aom_highbd_dc_128_predictor_4x8 sse2/;
292 specialize qw/aom_highbd_dc_left_predictor_8x4 sse2/;
293 specialize qw/aom_highbd_dc_top_predictor_8x4 sse2/;
294 specialize qw/aom_highbd_dc_128_predictor_8x4 sse2/;
295 specialize qw/aom_highbd_dc_left_predictor_8x8 sse2/;
296 specialize qw/aom_highbd_dc_top_predictor_8x8 sse2/;
297 specialize qw/aom_highbd_dc_128_predictor_8x8 sse2/;
298 specialize qw/aom_highbd_dc_left_predictor_8x16 sse2/;
299 specialize qw/aom_highbd_dc_top_predictor_8x16 sse2/;
300 specialize qw/aom_highbd_dc_128_predictor_8x16 sse2/;
301 specialize qw/aom_highbd_dc_left_predictor_16x8 sse2/;
302 specialize qw/aom_highbd_dc_top_predictor_16x8 sse2/;
303 specialize qw/aom_highbd_dc_128_predictor_16x8 sse2/;
304 specialize qw/aom_highbd_dc_left_predictor_16x16 sse2/;
305 specialize qw/aom_highbd_dc_top_predictor_16x16 sse2/;
306 specialize qw/aom_highbd_dc_128_predictor_16x16 sse2/;
307 specialize qw/aom_highbd_dc_left_predictor_16x32 sse2/;
308 specialize qw/aom_highbd_dc_top_predictor_16x32 sse2/;
309 specialize qw/aom_highbd_dc_128_predictor_16x32 sse2/;
310 specialize qw/aom_highbd_dc_left_predictor_32x16 sse2/;
311 specialize qw/aom_highbd_dc_top_predictor_32x16 sse2/;
312 specialize qw/aom_highbd_dc_128_predictor_32x16 sse2/;
313 specialize qw/aom_highbd_dc_left_predictor_32x32 sse2/;
314 specialize qw/aom_highbd_dc_top_predictor_32x32 sse2/;
315 specialize qw/aom_highbd_dc_128_predictor_32x32 sse2/;
318 # Sub Pixel Filters
320 add_proto qw/void aom_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
321 add_proto qw/void aom_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
322 add_proto qw/void aom_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
324 specialize qw/aom_convolve_copy sse2 /;
325 specialize qw/aom_convolve8_horiz sse2 ssse3/, "$avx2_ssse3";
326 specialize qw/aom_convolve8_vert sse2 ssse3/, "$avx2_ssse3";
328 # TODO(any): These need to be extended to up to 128x128 block sizes
329 if (!(aom_config("CONFIG_AV1") eq "yes")) {
330 specialize qw/aom_convolve_copy neon dspr2 msa/;
331 specialize qw/aom_convolve8_horiz neon dspr2 msa/;
332 specialize qw/aom_convolve8_vert neon dspr2 msa/;
335 add_proto qw/void aom_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
336 specialize qw/aom_highbd_convolve_copy sse2 avx2/;
338 add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
339 specialize qw/aom_highbd_convolve8_horiz avx2/, "$sse2_x86_64";
341 add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
342 specialize qw/aom_highbd_convolve8_vert avx2/, "$sse2_x86_64";
345 # Loopfilter
347 add_proto qw/void aom_lpf_vertical_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
348 specialize qw/aom_lpf_vertical_14 sse2/;
350 add_proto qw/void aom_lpf_vertical_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
351 specialize qw/aom_lpf_vertical_14_dual sse2/;
353 add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
354 specialize qw/aom_lpf_vertical_6 sse2/;
356 add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
357 specialize qw/aom_lpf_vertical_8 sse2/;
359 add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
361 add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
362 specialize qw/aom_lpf_vertical_4 sse2/;
364 add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
366 add_proto qw/void aom_lpf_horizontal_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
367 specialize qw/aom_lpf_horizontal_14 sse2/;
369 add_proto qw/void aom_lpf_horizontal_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
370 specialize qw/aom_lpf_horizontal_14_dual sse2/;
372 add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
373 specialize qw/aom_lpf_horizontal_6 sse2/;
375 add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
376 specialize qw/aom_lpf_horizontal_8 sse2/;
378 add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
380 add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
381 specialize qw/aom_lpf_horizontal_4 sse2/;
383 add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
385 add_proto qw/void aom_highbd_lpf_vertical_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
386 specialize qw/aom_highbd_lpf_vertical_14 sse2/;
388 add_proto qw/void aom_highbd_lpf_vertical_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
389 specialize qw/aom_highbd_lpf_vertical_14_dual sse2 avx2/;
391 add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
392 specialize qw/aom_highbd_lpf_vertical_8 sse2/;
394 add_proto qw/void aom_highbd_lpf_vertical_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
395 specialize qw/aom_highbd_lpf_vertical_6 sse2/;
397 add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
398 specialize qw/aom_highbd_lpf_vertical_8_dual sse2 avx2/;
400 add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
401 specialize qw/aom_highbd_lpf_vertical_4 sse2/;
403 add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
404 specialize qw/aom_highbd_lpf_vertical_4_dual sse2 avx2/;
406 add_proto qw/void aom_highbd_lpf_horizontal_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
407 specialize qw/aom_highbd_lpf_horizontal_14 sse2/;
409 add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
410 specialize qw/aom_highbd_lpf_horizontal_14_dual sse2 avx2/;
412 add_proto qw/void aom_highbd_lpf_horizontal_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
413 specialize qw/aom_highbd_lpf_horizontal_6 sse2/;
415 add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
416 specialize qw/aom_highbd_lpf_horizontal_8 sse2/;
418 add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
419 specialize qw/aom_highbd_lpf_horizontal_8_dual sse2 avx2/;
421 add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
422 specialize qw/aom_highbd_lpf_horizontal_4 sse2/;
424 add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
425 specialize qw/aom_highbd_lpf_horizontal_4_dual sse2 avx2/;
427 # Helper functions.
428 add_proto qw/void av1_round_shift_array/, "int32_t *arr, int size, int bit";
429 specialize "av1_round_shift_array", qw/sse4_1/;
432 # Encoder functions.
436 # Forward transform
438 if (aom_config("CONFIG_AV1_ENCODER") eq "yes"){
439 add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
440 specialize qw/aom_fdct4x4 sse2/;
442 add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
443 specialize qw/aom_fdct4x4_1 sse2/;
445 add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
446 specialize qw/aom_fdct8x8 sse2/, "$ssse3_x86_64";
448 # High bit depth
449 add_proto qw/void aom_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
450 specialize qw/aom_highbd_fdct4x4 sse2/;
452 add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
453 specialize qw/aom_highbd_fdct8x8 sse2/;
455 } # CONFIG_AV1_ENCODER
458 # Inverse transform
459 if (aom_config("CONFIG_AV1") eq "yes") {
460 add_proto qw/void aom_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
462 add_proto qw/void aom_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
463 } # CONFIG_AV1
466 # Quantization
468 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
469 add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
470 specialize qw/aom_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";
472 add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
473 specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
475 add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
476 } # CONFIG_AV1_ENCODER
478 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
479 add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
480 specialize qw/aom_highbd_quantize_b sse2 avx2/;
482 add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
483 specialize qw/aom_highbd_quantize_b_32x32 sse2/;
485 add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
487 } # CONFIG_AV1_ENCODER
488 if (aom_config("CONFIG_AV1") eq "yes") {
490 # Alpha blending with mask
492 add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, ConvolveParams *conv_params";
493 add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, ConvolveParams *conv_params, const int bd";
494 add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
495 add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
496 add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
497 specialize "aom_blend_a64_mask", qw/sse4_1/;
498 specialize "aom_blend_a64_hmask", qw/sse4_1/;
499 specialize "aom_blend_a64_vmask", qw/sse4_1/;
501 add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, int bd";
502 add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd";
503 add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd";
504 specialize "aom_highbd_blend_a64_mask", qw/sse4_1/;
505 specialize "aom_highbd_blend_a64_hmask", qw/sse4_1/;
506 specialize "aom_highbd_blend_a64_vmask", qw/sse4_1/;
508 } # CONFIG_AV1
510 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
512 # Block subtraction
514 add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
515 specialize qw/aom_subtract_block neon msa sse2/;
517 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
519 # Sum of Squares
521 add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height";
522 specialize qw/aom_sum_squares_2d_i16 sse2/;
524 add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N";
525 specialize qw/aom_sum_squares_i16 sse2/;
530 # Avg
532 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
534 # Avg
536 specialize qw/aom_avg_8x8 sse2 neon msa/;
537 add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
538 specialize qw/aom_highbd_subtract_block sse2/;
541 # Minmax
543 add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
544 specialize qw/aom_minmax_8x8 sse2 neon/;
545 add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
547 add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
548 specialize qw/aom_hadamard_8x8 sse2 neon/, "$ssse3_x86_64";
550 add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
551 specialize qw/aom_hadamard_16x16 sse2 neon/;
553 add_proto qw/int aom_satd/, "const int16_t *coeff, int length";
554 specialize qw/aom_satd sse2 neon/;
556 add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, int ref_stride, int height";
557 specialize qw/aom_int_pro_row sse2 neon/;
559 add_proto qw/int16_t aom_int_pro_col/, "const uint8_t *ref, int width";
560 specialize qw/aom_int_pro_col sse2 neon/;
562 add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl";
563 specialize qw/aom_vector_var neon sse2/;
564 } # CONFIG_AV1_ENCODER
567 # Single block SAD / Single block Avg SAD
569 foreach (@block_sizes) {
570 ($w, $h) = @$_;
571 add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
572 add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
573 add_proto qw/unsigned int/, "aom_jnt_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param";
576 specialize qw/aom_sad128x128 avx2 sse2/;
577 specialize qw/aom_sad128x64 avx2 sse2/;
578 specialize qw/aom_sad64x128 avx2 sse2/;
579 specialize qw/aom_sad64x64 avx2 neon msa sse2/;
580 specialize qw/aom_sad64x32 avx2 msa sse2/;
581 specialize qw/aom_sad32x64 avx2 msa sse2/;
582 specialize qw/aom_sad32x32 avx2 neon msa sse2/;
583 specialize qw/aom_sad32x16 avx2 msa sse2/;
584 specialize qw/aom_sad16x32 msa sse2/;
585 specialize qw/aom_sad16x16 neon msa sse2/;
586 specialize qw/aom_sad16x8 neon msa sse2/;
587 specialize qw/aom_sad8x16 neon msa sse2/;
588 specialize qw/aom_sad8x8 neon msa sse2/;
589 specialize qw/aom_sad8x4 msa sse2/;
590 specialize qw/aom_sad4x8 msa sse2/;
591 specialize qw/aom_sad4x4 neon msa sse2/;
593 specialize qw/aom_sad128x128_avg avx2 sse2/;
594 specialize qw/aom_sad128x64_avg avx2 sse2/;
595 specialize qw/aom_sad64x128_avg avx2 sse2/;
596 specialize qw/aom_sad64x64_avg avx2 msa sse2/;
597 specialize qw/aom_sad64x32_avg avx2 msa sse2/;
598 specialize qw/aom_sad32x64_avg avx2 msa sse2/;
599 specialize qw/aom_sad32x32_avg avx2 msa sse2/;
600 specialize qw/aom_sad32x16_avg avx2 msa sse2/;
601 specialize qw/aom_sad16x32_avg msa sse2/;
602 specialize qw/aom_sad16x16_avg msa sse2/;
603 specialize qw/aom_sad16x8_avg msa sse2/;
604 specialize qw/aom_sad8x16_avg msa sse2/;
605 specialize qw/aom_sad8x8_avg msa sse2/;
606 specialize qw/aom_sad8x4_avg msa sse2/;
607 specialize qw/aom_sad4x8_avg msa sse2/;
608 specialize qw/aom_sad4x4_avg msa sse2/;
610 specialize qw/aom_sad4x16 sse2/;
611 specialize qw/aom_sad16x4 sse2/;
612 specialize qw/aom_sad8x32 sse2/;
613 specialize qw/aom_sad32x8 sse2/;
614 specialize qw/aom_sad16x64 sse2/;
615 specialize qw/aom_sad64x16 sse2/;
617 specialize qw/aom_sad4x16_avg sse2/;
618 specialize qw/aom_sad16x4_avg sse2/;
619 specialize qw/aom_sad8x32_avg sse2/;
620 specialize qw/aom_sad32x8_avg sse2/;
621 specialize qw/aom_sad16x64_avg sse2/;
622 specialize qw/aom_sad64x16_avg sse2/;
624 specialize qw/aom_jnt_sad128x128_avg ssse3/;
625 specialize qw/aom_jnt_sad128x64_avg ssse3/;
626 specialize qw/aom_jnt_sad64x128_avg ssse3/;
627 specialize qw/aom_jnt_sad64x64_avg ssse3/;
628 specialize qw/aom_jnt_sad64x32_avg ssse3/;
629 specialize qw/aom_jnt_sad32x64_avg ssse3/;
630 specialize qw/aom_jnt_sad32x32_avg ssse3/;
631 specialize qw/aom_jnt_sad32x16_avg ssse3/;
632 specialize qw/aom_jnt_sad16x32_avg ssse3/;
633 specialize qw/aom_jnt_sad16x16_avg ssse3/;
634 specialize qw/aom_jnt_sad16x8_avg ssse3/;
635 specialize qw/aom_jnt_sad8x16_avg ssse3/;
636 specialize qw/aom_jnt_sad8x8_avg ssse3/;
637 specialize qw/aom_jnt_sad8x4_avg ssse3/;
638 specialize qw/aom_jnt_sad4x8_avg ssse3/;
639 specialize qw/aom_jnt_sad4x4_avg ssse3/;
641 specialize qw/aom_jnt_sad4x16_avg ssse3/;
642 specialize qw/aom_jnt_sad16x4_avg ssse3/;
643 specialize qw/aom_jnt_sad8x32_avg ssse3/;
644 specialize qw/aom_jnt_sad32x8_avg ssse3/;
645 specialize qw/aom_jnt_sad16x64_avg ssse3/;
646 specialize qw/aom_jnt_sad64x16_avg ssse3/;
648 add_proto qw/unsigned int/, "aom_sad4xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
649 add_proto qw/unsigned int/, "aom_sad8xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
650 add_proto qw/unsigned int/, "aom_sad16xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
651 add_proto qw/unsigned int/, "aom_sad32xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
652 add_proto qw/unsigned int/, "aom_sad64xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
653 add_proto qw/unsigned int/, "aom_sad128xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
655 specialize qw/aom_sad4xh sse2/;
656 specialize qw/aom_sad8xh sse2/;
657 specialize qw/aom_sad16xh sse2/;
658 specialize qw/aom_sad32xh sse2/;
659 specialize qw/aom_sad64xh sse2/;
660 specialize qw/aom_sad128xh sse2/;
663 foreach (@block_sizes) {
664 ($w, $h) = @$_;
665 add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
666 add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
667 if ($w != 128 && $h != 128 && $w != 4) {
668 specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
669 specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
671 add_proto qw/unsigned int/, "aom_highbd_jnt_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const JNT_COMP_PARAMS* jcp_param";
673 specialize qw/aom_highbd_sad128x128 avx2/;
674 specialize qw/aom_highbd_sad128x64 avx2/;
675 specialize qw/aom_highbd_sad64x128 avx2/;
676 specialize qw/aom_highbd_sad64x64 avx2 sse2/;
677 specialize qw/aom_highbd_sad64x32 avx2 sse2/;
678 specialize qw/aom_highbd_sad32x64 avx2 sse2/;
679 specialize qw/aom_highbd_sad32x32 avx2 sse2/;
680 specialize qw/aom_highbd_sad32x16 avx2 sse2/;
681 specialize qw/aom_highbd_sad16x32 avx2 sse2/;
682 specialize qw/aom_highbd_sad16x16 avx2 sse2/;
683 specialize qw/aom_highbd_sad16x8 avx2 sse2/;
684 specialize qw/aom_highbd_sad8x4 sse2/;
686 specialize qw/aom_highbd_sad128x128_avg avx2/;
687 specialize qw/aom_highbd_sad128x64_avg avx2/;
688 specialize qw/aom_highbd_sad64x128_avg avx2/;
689 specialize qw/aom_highbd_sad64x64_avg avx2 sse2/;
690 specialize qw/aom_highbd_sad64x32_avg avx2 sse2/;
691 specialize qw/aom_highbd_sad32x64_avg avx2 sse2/;
692 specialize qw/aom_highbd_sad32x32_avg avx2 sse2/;
693 specialize qw/aom_highbd_sad32x16_avg avx2 sse2/;
694 specialize qw/aom_highbd_sad16x32_avg avx2 sse2/;
695 specialize qw/aom_highbd_sad16x16_avg avx2 sse2/;
696 specialize qw/aom_highbd_sad16x8_avg avx2 sse2/;
697 specialize qw/aom_highbd_sad8x4_avg sse2/;
699 specialize qw/aom_highbd_sad16x4 sse2/;
700 specialize qw/aom_highbd_sad8x32 sse2/;
701 specialize qw/aom_highbd_sad32x8 sse2/;
702 specialize qw/aom_highbd_sad16x64 sse2/;
703 specialize qw/aom_highbd_sad64x16 sse2/;
705 specialize qw/aom_highbd_sad16x4_avg sse2/;
706 specialize qw/aom_highbd_sad8x32_avg sse2/;
707 specialize qw/aom_highbd_sad32x8_avg sse2/;
708 specialize qw/aom_highbd_sad16x64_avg sse2/;
709 specialize qw/aom_highbd_sad64x16_avg sse2/;
712 # Masked SAD
714 foreach (@block_sizes) {
715 ($w, $h) = @$_;
716 add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
717 specialize "aom_masked_sad${w}x${h}", qw/ssse3/;
721 foreach (@block_sizes) {
722 ($w, $h) = @$_;
723 add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask";
724 specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3/;
729 # OBMC SAD
731 foreach (@block_sizes) {
732 ($w, $h) = @$_;
733 add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
734 if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
735 specialize "aom_obmc_sad${w}x${h}", qw/sse4_1/;
740 foreach (@block_sizes) {
741 ($w, $h) = @$_;
742 add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
743 if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
744 specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1/;
750 # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
752 # Blocks of 3
753 foreach $s (@block_widths) {
754 add_proto qw/void/, "aom_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
756 specialize qw/aom_sad64x64x3 msa/;
757 specialize qw/aom_sad32x32x3 msa/;
758 specialize qw/aom_sad16x16x3 sse3 ssse3 msa/;
759 specialize qw/aom_sad8x8x3 sse3 msa/;
760 specialize qw/aom_sad4x4x3 sse3 msa/;
762 add_proto qw/void/, "aom_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
763 specialize qw/aom_sad16x8x3 sse3 ssse3 msa/;
764 add_proto qw/void/, "aom_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
765 specialize qw/aom_sad8x16x3 sse3 msa/;
767 # Blocks of 8
768 foreach $s (@block_widths) {
769 add_proto qw/void/, "aom_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
771 specialize qw/aom_sad64x64x8 msa/;
772 specialize qw/aom_sad32x32x8 msa/;
773 specialize qw/aom_sad16x16x8 sse4_1 msa/;
774 specialize qw/aom_sad8x8x8 sse4_1 msa/;
775 specialize qw/aom_sad4x4x8 sse4_1 msa/;
777 add_proto qw/void/, "aom_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
778 specialize qw/aom_sad16x8x8 sse4_1 msa/;
779 add_proto qw/void/, "aom_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
780 specialize qw/aom_sad8x16x8 sse4_1 msa/;
781 add_proto qw/void/, "aom_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
782 specialize qw/aom_sad8x4x8 msa/;
783 add_proto qw/void/, "aom_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
784 specialize qw/aom_sad4x8x8 msa/;
787 foreach $s (@block_widths) {
788 # Blocks of 3
789 add_proto qw/void/, "aom_highbd_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
790 # Blocks of 8
791 add_proto qw/void/, "aom_highbd_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
793 # Blocks of 3
794 add_proto qw/void/, "aom_highbd_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
795 add_proto qw/void/, "aom_highbd_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
796 # Blocks of 8
797 add_proto qw/void/, "aom_highbd_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
798 add_proto qw/void/, "aom_highbd_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
799 add_proto qw/void/, "aom_highbd_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
800 add_proto qw/void/, "aom_highbd_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
804 # Multi-block SAD, comparing a reference to N independent blocks
806 foreach (@block_sizes) {
807 ($w, $h) = @$_;
808 add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
811 specialize qw/aom_sad128x128x4d avx2 sse2/;
812 specialize qw/aom_sad128x64x4d avx2 sse2/;
813 specialize qw/aom_sad64x128x4d avx2 sse2/;
814 specialize qw/aom_sad64x64x4d avx2 neon msa sse2/;
815 specialize qw/aom_sad64x32x4d avx2 msa sse2/;
816 specialize qw/aom_sad32x64x4d avx2 msa sse2/;
817 specialize qw/aom_sad32x32x4d avx2 neon msa sse2/;
818 specialize qw/aom_sad32x16x4d msa sse2/;
819 specialize qw/aom_sad16x32x4d msa sse2/;
820 specialize qw/aom_sad16x16x4d neon msa sse2/;
821 specialize qw/aom_sad16x8x4d msa sse2/;
822 specialize qw/aom_sad8x16x4d msa sse2/;
823 specialize qw/aom_sad8x8x4d msa sse2/;
824 specialize qw/aom_sad8x4x4d msa sse2/;
825 specialize qw/aom_sad4x8x4d msa sse2/;
826 specialize qw/aom_sad4x4x4d msa sse2/;
828 specialize qw/aom_sad4x16x4d sse2/;
829 specialize qw/aom_sad16x4x4d sse2/;
830 specialize qw/aom_sad8x32x4d sse2/;
831 specialize qw/aom_sad32x8x4d sse2/;
832 specialize qw/aom_sad16x64x4d sse2/;
833 specialize qw/aom_sad64x16x4d sse2/;
836 # Multi-block SAD, comparing a reference to N independent blocks
838 foreach (@block_sizes) {
839 ($w, $h) = @$_;
840 add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
841 if ($w != 128 && $h != 128) {
842 specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/;
845 specialize qw/aom_highbd_sad128x128x4d avx2/;
846 specialize qw/aom_highbd_sad128x64x4d avx2/;
847 specialize qw/aom_highbd_sad64x128x4d avx2/;
848 specialize qw/aom_highbd_sad64x64x4d sse2 avx2/;
849 specialize qw/aom_highbd_sad64x32x4d sse2 avx2/;
850 specialize qw/aom_highbd_sad32x64x4d sse2 avx2/;
851 specialize qw/aom_highbd_sad32x32x4d sse2 avx2/;
852 specialize qw/aom_highbd_sad32x16x4d sse2 avx2/;
853 specialize qw/aom_highbd_sad16x32x4d sse2 avx2/;
854 specialize qw/aom_highbd_sad16x16x4d sse2 avx2/;
855 specialize qw/aom_highbd_sad16x8x4d sse2 avx2/;
856 specialize qw/aom_highbd_sad8x16x4d sse2/;
857 specialize qw/aom_highbd_sad8x8x4d sse2/;
858 specialize qw/aom_highbd_sad8x4x4d sse2/;
859 specialize qw/aom_highbd_sad4x8x4d sse2/;
860 specialize qw/aom_highbd_sad4x4x4d sse2/;
862 specialize qw/aom_highbd_sad4x16x4d sse2/;
863 specialize qw/aom_highbd_sad16x4x4d sse2/;
864 specialize qw/aom_highbd_sad8x32x4d sse2/;
865 specialize qw/aom_highbd_sad32x8x4d sse2/;
866 specialize qw/aom_highbd_sad16x64x4d sse2/;
867 specialize qw/aom_highbd_sad64x16x4d sse2/;
871 # Structured Similarity (SSIM)
873 if (aom_config("CONFIG_INTERNAL_STATS") eq "yes") {
874 add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
875 specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64";
877 add_proto qw/void aom_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
878 specialize qw/aom_ssim_parms_16x16/, "$sse2_x86_64";
880 add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
883 } # CONFIG_AV1_ENCODER
885 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
888 # Specialty Variance
890 add_proto qw/void aom_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
892 add_proto qw/void aom_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
894 specialize qw/aom_get16x16var sse2 avx2 neon msa/;
895 specialize qw/aom_get8x8var sse2 neon msa/;
898 add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
899 add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
900 add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
901 add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
903 specialize qw/aom_mse16x16 sse2 avx2 neon msa/;
904 specialize qw/aom_mse16x8 sse2 msa/;
905 specialize qw/aom_mse8x16 sse2 msa/;
906 specialize qw/aom_mse8x8 sse2 msa/;
908 foreach $bd (8, 10, 12) {
909 add_proto qw/void/, "aom_highbd_${bd}_get16x16var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
910 add_proto qw/void/, "aom_highbd_${bd}_get8x8var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
912 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
913 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
914 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
915 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
917 specialize "aom_highbd_${bd}_mse16x16", qw/sse2/;
918 specialize "aom_highbd_${bd}_mse8x8", qw/sse2/;
925 add_proto qw/void aom_upsampled_pred/, "uint8_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
926 specialize qw/aom_upsampled_pred sse2/;
928 add_proto qw/void aom_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
929 specialize qw/aom_comp_avg_upsampled_pred sse2/;
931 add_proto qw/void aom_jnt_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride, const JNT_COMP_PARAMS *jcp_param";
932 specialize qw/aom_jnt_comp_avg_upsampled_pred ssse3/;
935 add_proto qw/void aom_highbd_upsampled_pred/, "uint16_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
936 specialize qw/aom_highbd_upsampled_pred sse2/;
937 add_proto qw/void aom_highbd_comp_avg_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
938 specialize qw/aom_highbd_comp_avg_upsampled_pred sse2/;
940 add_proto qw/void aom_highbd_jnt_comp_avg_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param";
941 specialize qw/aom_highbd_jnt_comp_avg_upsampled_pred sse2/;
947 add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *";
948 add_proto qw/unsigned int aom_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
950 specialize qw/aom_get_mb_ss sse2 msa/;
951 specialize qw/aom_get4x4sse_cs neon msa/;
954 # Variance / Subpixel Variance / Subpixel Avg Variance
956 add_proto qw/unsigned int/, "aom_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
958 add_proto qw/unsigned int/, "aom_variance2x4", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
960 add_proto qw/unsigned int/, "aom_variance4x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
962 foreach (@block_sizes) {
963 ($w, $h) = @$_;
964 add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
965 add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
966 add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
967 add_proto qw/uint32_t/, "aom_jnt_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param";
969 specialize qw/aom_variance128x128 sse2 avx2 /;
970 specialize qw/aom_variance128x64 sse2 avx2 /;
971 specialize qw/aom_variance64x128 sse2 avx2 /;
972 specialize qw/aom_variance64x64 sse2 avx2 neon msa/;
973 specialize qw/aom_variance64x32 sse2 avx2 neon msa/;
974 specialize qw/aom_variance32x64 sse2 neon msa/;
975 specialize qw/aom_variance32x32 sse2 avx2 neon msa/;
976 specialize qw/aom_variance32x16 sse2 avx2 msa/;
977 specialize qw/aom_variance16x32 sse2 msa/;
978 specialize qw/aom_variance16x16 sse2 avx2 neon msa/;
979 specialize qw/aom_variance16x8 sse2 neon msa/;
980 specialize qw/aom_variance8x16 sse2 neon msa/;
981 specialize qw/aom_variance8x8 sse2 neon msa/;
982 specialize qw/aom_variance8x4 sse2 msa/;
983 specialize qw/aom_variance4x8 sse2 msa/;
984 specialize qw/aom_variance4x4 sse2 msa/;
986 specialize qw/aom_sub_pixel_variance64x64 avx2 neon msa sse2 ssse3/;
987 specialize qw/aom_sub_pixel_variance64x32 msa sse2 ssse3/;
988 specialize qw/aom_sub_pixel_variance32x64 msa sse2 ssse3/;
989 specialize qw/aom_sub_pixel_variance32x32 avx2 neon msa sse2 ssse3/;
990 specialize qw/aom_sub_pixel_variance32x16 msa sse2 ssse3/;
991 specialize qw/aom_sub_pixel_variance16x32 msa sse2 ssse3/;
992 specialize qw/aom_sub_pixel_variance16x16 neon msa sse2 ssse3/;
993 specialize qw/aom_sub_pixel_variance16x8 msa sse2 ssse3/;
994 specialize qw/aom_sub_pixel_variance8x16 msa sse2 ssse3/;
995 specialize qw/aom_sub_pixel_variance8x8 neon msa sse2 ssse3/;
996 specialize qw/aom_sub_pixel_variance8x4 msa sse2 ssse3/;
997 specialize qw/aom_sub_pixel_variance4x8 msa sse2 ssse3/;
998 specialize qw/aom_sub_pixel_variance4x4 msa sse2 ssse3/;
1000 specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;
1001 specialize qw/aom_sub_pixel_avg_variance64x32 msa sse2 ssse3/;
1002 specialize qw/aom_sub_pixel_avg_variance32x64 msa sse2 ssse3/;
1003 specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;
1004 specialize qw/aom_sub_pixel_avg_variance32x16 msa sse2 ssse3/;
1005 specialize qw/aom_sub_pixel_avg_variance16x32 msa sse2 ssse3/;
1006 specialize qw/aom_sub_pixel_avg_variance16x16 msa sse2 ssse3/;
1007 specialize qw/aom_sub_pixel_avg_variance16x8 msa sse2 ssse3/;
1008 specialize qw/aom_sub_pixel_avg_variance8x16 msa sse2 ssse3/;
1009 specialize qw/aom_sub_pixel_avg_variance8x8 msa sse2 ssse3/;
1010 specialize qw/aom_sub_pixel_avg_variance8x4 msa sse2 ssse3/;
1011 specialize qw/aom_sub_pixel_avg_variance4x8 msa sse2 ssse3/;
1012 specialize qw/aom_sub_pixel_avg_variance4x4 msa sse2 ssse3/;
1014 specialize qw/aom_variance4x16 sse2/;
1015 specialize qw/aom_variance16x4 sse2/;
1016 specialize qw/aom_variance8x32 sse2/;
1017 specialize qw/aom_variance32x8 sse2/;
1018 specialize qw/aom_variance16x64 sse2/;
1019 specialize qw/aom_variance64x16 sse2/;
1020 specialize qw/aom_sub_pixel_variance4x16 sse2 ssse3/;
1021 specialize qw/aom_sub_pixel_variance16x4 sse2 ssse3/;
1022 specialize qw/aom_sub_pixel_variance8x32 sse2 ssse3/;
1023 specialize qw/aom_sub_pixel_variance32x8 sse2 ssse3/;
1024 specialize qw/aom_sub_pixel_variance16x64 sse2 ssse3/;
1025 specialize qw/aom_sub_pixel_variance64x16 sse2 ssse3/;
1026 specialize qw/aom_sub_pixel_avg_variance4x16 sse2 ssse3/;
1027 specialize qw/aom_sub_pixel_avg_variance16x4 sse2 ssse3/;
1028 specialize qw/aom_sub_pixel_avg_variance8x32 sse2 ssse3/;
1029 specialize qw/aom_sub_pixel_avg_variance32x8 sse2 ssse3/;
1030 specialize qw/aom_sub_pixel_avg_variance16x64 sse2 ssse3/;
1031 specialize qw/aom_sub_pixel_avg_variance64x16 sse2 ssse3/;
1033 specialize qw/aom_jnt_sub_pixel_avg_variance64x64 ssse3/;
1034 specialize qw/aom_jnt_sub_pixel_avg_variance64x32 ssse3/;
1035 specialize qw/aom_jnt_sub_pixel_avg_variance32x64 ssse3/;
1036 specialize qw/aom_jnt_sub_pixel_avg_variance32x32 ssse3/;
1037 specialize qw/aom_jnt_sub_pixel_avg_variance32x16 ssse3/;
1038 specialize qw/aom_jnt_sub_pixel_avg_variance16x32 ssse3/;
1039 specialize qw/aom_jnt_sub_pixel_avg_variance16x16 ssse3/;
1040 specialize qw/aom_jnt_sub_pixel_avg_variance16x8 ssse3/;
1041 specialize qw/aom_jnt_sub_pixel_avg_variance8x16 ssse3/;
1042 specialize qw/aom_jnt_sub_pixel_avg_variance8x8 ssse3/;
1043 specialize qw/aom_jnt_sub_pixel_avg_variance8x4 ssse3/;
1044 specialize qw/aom_jnt_sub_pixel_avg_variance4x8 ssse3/;
1045 specialize qw/aom_jnt_sub_pixel_avg_variance4x4 ssse3/;
1047 specialize qw/aom_jnt_sub_pixel_avg_variance4x16 ssse3/;
1048 specialize qw/aom_jnt_sub_pixel_avg_variance16x4 ssse3/;
1049 specialize qw/aom_jnt_sub_pixel_avg_variance8x32 ssse3/;
1050 specialize qw/aom_jnt_sub_pixel_avg_variance32x8 ssse3/;
1051 specialize qw/aom_jnt_sub_pixel_avg_variance16x64 ssse3/;
1052 specialize qw/aom_jnt_sub_pixel_avg_variance64x16 ssse3/;
1054 specialize qw/aom_jnt_sub_pixel_avg_variance128x128 ssse3/;
1055 specialize qw/aom_jnt_sub_pixel_avg_variance128x64 ssse3/;
1056 specialize qw/aom_jnt_sub_pixel_avg_variance64x128 ssse3/;
1059 foreach $bd (8, 10, 12) {
1060 add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1062 add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x4", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1064 add_proto qw/unsigned int/, "aom_highbd_${bd}_variance4x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1066 foreach (@block_sizes) {
1067 ($w, $h) = @$_;
1068 add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1069 add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1070 add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1071 if ($w != 128 && $h != 128 && $w != 4 && $h != 4) {
1072 specialize "aom_highbd_${bd}_variance${w}x${h}", "sse2";
1074 # TODO(david.barker): When ext-partition-types is enabled, we currently
1075 # don't have vectorized 4x16 highbd variance functions
1076 if ($w == 4 && $h == 4) {
1077 specialize "aom_highbd_${bd}_variance${w}x${h}", "sse4_1";
1079 if ($w != 128 && $h != 128 && $w != 4) {
1080 specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", qw/sse2/;
1081 specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", qw/sse2/;
1083 if ($w == 4 && $h == 4) {
1084 specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "sse4_1";
1085 specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "sse4_1";
1088 add_proto qw/uint32_t/, "aom_highbd_${bd}_jnt_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const JNT_COMP_PARAMS* jcp_param";
1093 # Masked Variance / Masked Subpixel Variance
1095 foreach (@block_sizes) {
1096 ($w, $h) = @$_;
1097 add_proto qw/unsigned int/, "aom_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
1098 specialize "aom_masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
1102 foreach $bd ("_8_", "_10_", "_12_") {
1103 foreach (@block_sizes) {
1104 ($w, $h) = @$_;
1105 add_proto qw/unsigned int/, "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
1106 specialize "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
1112 # OBMC Variance / OBMC Subpixel Variance
1114 foreach (@block_sizes) {
1115 ($w, $h) = @$_;
1116 add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1117 add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1118 specialize "aom_obmc_variance${w}x${h}", q/sse4_1/;
1122 foreach $bd ("_", "_10_", "_12_") {
1123 foreach (@block_sizes) {
1124 ($w, $h) = @$_;
1125 add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1126 add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1127 specialize "aom_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1/;
1132 add_proto qw/uint32_t aom_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1133 specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;
1135 add_proto qw/uint32_t aom_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1136 specialize qw/aom_sub_pixel_avg_variance64x32 msa sse2 ssse3/;
1138 add_proto qw/uint32_t aom_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1139 specialize qw/aom_sub_pixel_avg_variance32x64 msa sse2 ssse3/;
1141 add_proto qw/uint32_t aom_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1142 specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;
1144 add_proto qw/uint32_t aom_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1145 specialize qw/aom_sub_pixel_avg_variance32x16 msa sse2 ssse3/;
1147 add_proto qw/uint32_t aom_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1148 specialize qw/aom_sub_pixel_avg_variance16x32 msa sse2 ssse3/;
1150 add_proto qw/uint32_t aom_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1151 specialize qw/aom_sub_pixel_avg_variance16x16 msa sse2 ssse3/;
1153 add_proto qw/uint32_t aom_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1154 specialize qw/aom_sub_pixel_avg_variance16x8 msa sse2 ssse3/;
1156 add_proto qw/uint32_t aom_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1157 specialize qw/aom_sub_pixel_avg_variance8x16 msa sse2 ssse3/;
1159 add_proto qw/uint32_t aom_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1160 specialize qw/aom_sub_pixel_avg_variance8x8 msa sse2 ssse3/;
1162 add_proto qw/uint32_t aom_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1163 specialize qw/aom_sub_pixel_avg_variance8x4 msa sse2 ssse3/;
1165 add_proto qw/uint32_t aom_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1166 specialize qw/aom_sub_pixel_avg_variance4x8 msa sse2 ssse3/;
1168 add_proto qw/uint32_t aom_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1169 specialize qw/aom_sub_pixel_avg_variance4x4 msa sse2 ssse3/;
1171 # Specialty Subpixel
1173 add_proto qw/uint32_t aom_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
1174 specialize qw/aom_variance_halfpixvar16x16_h sse2/;
1176 add_proto qw/uint32_t aom_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
1177 specialize qw/aom_variance_halfpixvar16x16_v sse2/;
1179 add_proto qw/uint32_t aom_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
1180 specialize qw/aom_variance_halfpixvar16x16_hv sse2/;
1183 # Comp Avg
1185 add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
1187 add_proto qw/void aom_jnt_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const JNT_COMP_PARAMS *jcp_param";
1188 specialize qw/aom_jnt_comp_avg_pred ssse3/;
1191 add_proto qw/unsigned int aom_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1192 specialize qw/aom_highbd_12_variance64x64 sse2/;
1194 add_proto qw/unsigned int aom_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1195 specialize qw/aom_highbd_12_variance64x32 sse2/;
1197 add_proto qw/unsigned int aom_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1198 specialize qw/aom_highbd_12_variance32x64 sse2/;
1200 add_proto qw/unsigned int aom_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1201 specialize qw/aom_highbd_12_variance32x32 sse2/;
1203 add_proto qw/unsigned int aom_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1204 specialize qw/aom_highbd_12_variance32x16 sse2/;
1206 add_proto qw/unsigned int aom_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1207 specialize qw/aom_highbd_12_variance16x32 sse2/;
1209 add_proto qw/unsigned int aom_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1210 specialize qw/aom_highbd_12_variance16x16 sse2/;
1212 add_proto qw/unsigned int aom_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1213 specialize qw/aom_highbd_12_variance16x8 sse2/;
1215 add_proto qw/unsigned int aom_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1216 specialize qw/aom_highbd_12_variance8x16 sse2/;
1218 add_proto qw/unsigned int aom_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1219 specialize qw/aom_highbd_12_variance8x8 sse2/;
1221 add_proto qw/unsigned int aom_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1222 add_proto qw/unsigned int aom_highbd_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1223 add_proto qw/unsigned int aom_highbd_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1225 add_proto qw/unsigned int aom_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1226 specialize qw/aom_highbd_10_variance64x64 sse2/;
1228 add_proto qw/unsigned int aom_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1229 specialize qw/aom_highbd_10_variance64x32 sse2/;
1231 add_proto qw/unsigned int aom_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1232 specialize qw/aom_highbd_10_variance32x64 sse2/;
1234 add_proto qw/unsigned int aom_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1235 specialize qw/aom_highbd_10_variance32x32 sse2/;
1237 add_proto qw/unsigned int aom_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1238 specialize qw/aom_highbd_10_variance32x16 sse2/;
1240 add_proto qw/unsigned int aom_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1241 specialize qw/aom_highbd_10_variance16x32 sse2/;
1243 add_proto qw/unsigned int aom_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1244 specialize qw/aom_highbd_10_variance16x16 sse2/;
1246 add_proto qw/unsigned int aom_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1247 specialize qw/aom_highbd_10_variance16x8 sse2/;
1249 add_proto qw/unsigned int aom_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1250 specialize qw/aom_highbd_10_variance8x16 sse2/;
1252 add_proto qw/unsigned int aom_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1253 specialize qw/aom_highbd_10_variance8x8 sse2/;
1255 add_proto qw/unsigned int aom_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1256 add_proto qw/unsigned int aom_highbd_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1257 add_proto qw/unsigned int aom_highbd_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1259 add_proto qw/unsigned int aom_highbd_8_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1260 specialize qw/aom_highbd_8_variance64x64 sse2/;
1262 add_proto qw/unsigned int aom_highbd_8_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1263 specialize qw/aom_highbd_8_variance64x32 sse2/;
1265 add_proto qw/unsigned int aom_highbd_8_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1266 specialize qw/aom_highbd_8_variance32x64 sse2/;
1268 add_proto qw/unsigned int aom_highbd_8_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1269 specialize qw/aom_highbd_8_variance32x32 sse2/;
1271 add_proto qw/unsigned int aom_highbd_8_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1272 specialize qw/aom_highbd_8_variance32x16 sse2/;
1274 add_proto qw/unsigned int aom_highbd_8_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1275 specialize qw/aom_highbd_8_variance16x32 sse2/;
1277 add_proto qw/unsigned int aom_highbd_8_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1278 specialize qw/aom_highbd_8_variance16x16 sse2/;
1280 add_proto qw/unsigned int aom_highbd_8_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1281 specialize qw/aom_highbd_8_variance16x8 sse2/;
1283 add_proto qw/unsigned int aom_highbd_8_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1284 specialize qw/aom_highbd_8_variance8x16 sse2/;
1286 add_proto qw/unsigned int aom_highbd_8_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1287 specialize qw/aom_highbd_8_variance8x8 sse2/;
1289 add_proto qw/unsigned int aom_highbd_8_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1290 add_proto qw/unsigned int aom_highbd_8_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1291 add_proto qw/unsigned int aom_highbd_8_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1293 add_proto qw/void aom_highbd_8_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1294 add_proto qw/void aom_highbd_8_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1296 add_proto qw/void aom_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1297 add_proto qw/void aom_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1299 add_proto qw/void aom_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1300 add_proto qw/void aom_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1302 add_proto qw/unsigned int aom_highbd_8_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1303 specialize qw/aom_highbd_8_mse16x16 sse2/;
1305 add_proto qw/unsigned int aom_highbd_8_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1306 add_proto qw/unsigned int aom_highbd_8_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1307 add_proto qw/unsigned int aom_highbd_8_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1308 specialize qw/aom_highbd_8_mse8x8 sse2/;
1310 add_proto qw/unsigned int aom_highbd_10_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1311 specialize qw/aom_highbd_10_mse16x16 sse2/;
1313 add_proto qw/unsigned int aom_highbd_10_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1314 add_proto qw/unsigned int aom_highbd_10_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1315 add_proto qw/unsigned int aom_highbd_10_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1316 specialize qw/aom_highbd_10_mse8x8 sse2/;
1318 add_proto qw/unsigned int aom_highbd_12_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1319 specialize qw/aom_highbd_12_mse16x16 sse2/;
1321 add_proto qw/unsigned int aom_highbd_12_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1322 add_proto qw/unsigned int aom_highbd_12_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1323 add_proto qw/unsigned int aom_highbd_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1324 specialize qw/aom_highbd_12_mse8x8 sse2/;
1326 add_proto qw/void aom_highbd_comp_avg_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
1328 add_proto qw/void aom_highbd_jnt_comp_avg_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const JNT_COMP_PARAMS *jcp_param";
1329 specialize qw/aom_highbd_jnt_comp_avg_pred c/;
1332 # Subpixel Variance
1334 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1335 specialize qw/aom_highbd_12_sub_pixel_variance64x64 sse2/;
1337 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1338 specialize qw/aom_highbd_12_sub_pixel_variance64x32 sse2/;
1340 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1341 specialize qw/aom_highbd_12_sub_pixel_variance32x64 sse2/;
1343 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1344 specialize qw/aom_highbd_12_sub_pixel_variance32x32 sse2/;
1346 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1347 specialize qw/aom_highbd_12_sub_pixel_variance32x16 sse2/;
1349 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1350 specialize qw/aom_highbd_12_sub_pixel_variance16x32 sse2/;
1352 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1353 specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2/;
1355 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1356 specialize qw/aom_highbd_12_sub_pixel_variance16x8 sse2/;
1358 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1359 specialize qw/aom_highbd_12_sub_pixel_variance8x16 sse2/;
1361 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1362 specialize qw/aom_highbd_12_sub_pixel_variance8x8 sse2/;
1364 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1365 specialize qw/aom_highbd_12_sub_pixel_variance8x4 sse2/;
1367 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1368 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1370 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1371 specialize qw/aom_highbd_10_sub_pixel_variance64x64 sse2/;
1373 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1374 specialize qw/aom_highbd_10_sub_pixel_variance64x32 sse2/;
1376 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1377 specialize qw/aom_highbd_10_sub_pixel_variance32x64 sse2/;
1379 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1380 specialize qw/aom_highbd_10_sub_pixel_variance32x32 sse2/;
1382 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1383 specialize qw/aom_highbd_10_sub_pixel_variance32x16 sse2/;
1385 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1386 specialize qw/aom_highbd_10_sub_pixel_variance16x32 sse2/;
1388 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1389 specialize qw/aom_highbd_10_sub_pixel_variance16x16 sse2/;
1391 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1392 specialize qw/aom_highbd_10_sub_pixel_variance16x8 sse2/;
1394 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1395 specialize qw/aom_highbd_10_sub_pixel_variance8x16 sse2/;
1397 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1398 specialize qw/aom_highbd_10_sub_pixel_variance8x8 sse2/;
1400 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1401 specialize qw/aom_highbd_10_sub_pixel_variance8x4 sse2/;
1403 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1404 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1406 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1407 specialize qw/aom_highbd_8_sub_pixel_variance64x64 sse2/;
1409 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1410 specialize qw/aom_highbd_8_sub_pixel_variance64x32 sse2/;
1412 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1413 specialize qw/aom_highbd_8_sub_pixel_variance32x64 sse2/;
1415 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1416 specialize qw/aom_highbd_8_sub_pixel_variance32x32 sse2/;
1418 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1419 specialize qw/aom_highbd_8_sub_pixel_variance32x16 sse2/;
1421 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1422 specialize qw/aom_highbd_8_sub_pixel_variance16x32 sse2/;
1424 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1425 specialize qw/aom_highbd_8_sub_pixel_variance16x16 sse2/;
1427 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1428 specialize qw/aom_highbd_8_sub_pixel_variance16x8 sse2/;
1430 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1431 specialize qw/aom_highbd_8_sub_pixel_variance8x16 sse2/;
1433 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1434 specialize qw/aom_highbd_8_sub_pixel_variance8x8 sse2/;
1436 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1437 specialize qw/aom_highbd_8_sub_pixel_variance8x4 sse2/;
1439 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1440 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1442 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1443 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x64 sse2/;
1445 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1446 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x32 sse2/;
1448 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1449 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x64 sse2/;
1451 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1452 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x32 sse2/;
1454 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1455 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x16 sse2/;
1457 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1458 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32 sse2/;
1460 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1461 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16 sse2/;
1463 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1464 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8 sse2/;
1466 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1467 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16 sse2/;
1469 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1470 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x8 sse2/;
1472 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1473 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x4 sse2/;
1475 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1476 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1478 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1479 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x64 sse2/;
1481 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1482 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x32 sse2/;
1484 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1485 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x64 sse2/;
1487 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1488 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x32 sse2/;
1490 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1491 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x16 sse2/;
1493 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1494 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32 sse2/;
1496 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1497 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16 sse2/;
1499 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1500 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8 sse2/;
1502 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1503 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16 sse2/;
1505 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1506 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x8 sse2/;
1508 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1509 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x4 sse2/;
1511 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1512 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1514 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1515 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x64 sse2/;
1517 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1518 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x32 sse2/;
1520 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1521 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x64 sse2/;
1523 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1524 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x32 sse2/;
1526 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1527 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x16 sse2/;
1529 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1530 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32 sse2/;
1532 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1533 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16 sse2/;
1535 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1536 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8 sse2/;
1538 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1539 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16 sse2/;
1541 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1542 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x8 sse2/;
1544 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1545 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x4 sse2/;
1547 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1548 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1552 add_proto qw/void aom_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
1553 specialize qw/aom_comp_mask_pred ssse3 avx2/;
1555 add_proto qw/void aom_highbd_comp_mask_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
1556 add_proto qw/void aom_highbd_comp_mask_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask, int bd";
1559 } # CONFIG_AV1_ENCODER