Add sse2/ssse3 intra predictors for 8x32
[aom.git] / aom_dsp / aom_dsp_rtcd_defs.pl
blob954583827a1605bb0f1259898f7e2a7f3e553e20
1 ##
2 ## Copyright (c) 2017, Alliance for Open Media. All rights reserved
3 ##
4 ## This source code is subject to the terms of the BSD 2 Clause License and
5 ## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 ## was not distributed with this source code in the LICENSE file, you can
7 ## obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 ## Media Patent License 1.0 was not distributed with this source code in the
9 ## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
11 sub aom_dsp_forward_decls() {
12 print <<EOF
14 * DSP
17 #include "aom/aom_integer.h"
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "av1/common/enums.h"
20 #include "av1/common/blockd.h"
22 EOF
24 forward_decls qw/aom_dsp_forward_decls/;
26 # optimizations which depend on multiple features
27 $avx2_ssse3 = '';
28 if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) {
29 $avx2_ssse3 = 'avx2';
32 # functions that are 64 bit only.
33 $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
34 if ($opts{arch} eq "x86_64") {
35 $mmx_x86_64 = 'mmx';
36 $sse2_x86_64 = 'sse2';
37 $ssse3_x86_64 = 'ssse3';
38 $avx_x86_64 = 'avx';
39 $avx2_x86_64 = 'avx2';
42 @block_widths = (4, 8, 16, 32, 64, 128);
44 @block_sizes = ();
45 foreach $w (@block_widths) {
46 foreach $h (@block_widths) {
47 push @block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w) ;
50 push @block_sizes, [4, 16];
51 push @block_sizes, [16, 4];
52 push @block_sizes, [8, 32];
53 push @block_sizes, [32, 8];
54 push @block_sizes, [16, 64];
55 push @block_sizes, [64, 16];
57 @tx_dims = (2, 4, 8, 16, 32, 64);
58 @tx_sizes = ();
59 foreach $w (@tx_dims) {
60 push @tx_sizes, [$w, $w];
61 foreach $h (@tx_dims) {
62 push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w));
63 push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 4*$h || $h == 4*$w));
67 @pred_names = qw/dc dc_top dc_left dc_128 v h paeth smooth smooth_v smooth_h/;
70 # Intra prediction
73 foreach (@tx_sizes) {
74 ($w, $h) = @$_;
75 foreach $pred_name (@pred_names) {
76 add_proto "void", "aom_${pred_name}_predictor_${w}x${h}",
77 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
78 add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}",
79 "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
83 specialize qw/aom_dc_top_predictor_4x4 msa neon sse2/;
84 specialize qw/aom_dc_top_predictor_4x8 sse2/;
85 specialize qw/aom_dc_top_predictor_4x16 sse2/;
86 specialize qw/aom_dc_top_predictor_8x4 sse2/;
87 specialize qw/aom_dc_top_predictor_8x8 neon msa sse2/;
88 specialize qw/aom_dc_top_predictor_8x16 sse2/;
89 specialize qw/aom_dc_top_predictor_8x32 sse2/;
90 specialize qw/aom_dc_top_predictor_16x8 sse2/;
91 specialize qw/aom_dc_top_predictor_16x16 neon msa sse2/;
92 specialize qw/aom_dc_top_predictor_16x32 sse2/;
93 specialize qw/aom_dc_top_predictor_16x64 sse2/;
94 specialize qw/aom_dc_top_predictor_32x16 sse2 avx2/;
95 specialize qw/aom_dc_top_predictor_32x32 msa neon sse2 avx2/;
96 specialize qw/aom_dc_top_predictor_32x64 sse2 avx2/;
97 specialize qw/aom_dc_top_predictor_64x64 sse2 avx2/;
98 specialize qw/aom_dc_top_predictor_64x32 sse2 avx2/;
99 specialize qw/aom_dc_top_predictor_64x16 sse2 avx2/;
100 specialize qw/aom_dc_left_predictor_4x4 msa neon sse2/;
101 specialize qw/aom_dc_left_predictor_4x8 sse2/;
102 specialize qw/aom_dc_left_predictor_4x16 sse2/;
103 specialize qw/aom_dc_left_predictor_8x4 sse2/;
104 specialize qw/aom_dc_left_predictor_8x8 neon msa sse2/;
105 specialize qw/aom_dc_left_predictor_8x16 sse2/;
106 specialize qw/aom_dc_left_predictor_8x32 sse2/;
107 specialize qw/aom_dc_left_predictor_16x8 sse2/;
108 specialize qw/aom_dc_left_predictor_16x16 neon msa sse2/;
109 specialize qw/aom_dc_left_predictor_16x32 sse2/;
110 specialize qw/aom_dc_left_predictor_16x64 sse2/;
111 specialize qw/aom_dc_left_predictor_32x16 sse2 avx2/;
112 specialize qw/aom_dc_left_predictor_32x32 msa neon sse2 avx2/;
113 specialize qw/aom_dc_left_predictor_32x64 sse2 avx2/;
114 specialize qw/aom_dc_left_predictor_64x64 sse2 avx2/;
115 specialize qw/aom_dc_left_predictor_64x32 sse2 avx2/;
116 specialize qw/aom_dc_left_predictor_64x16 sse2 avx2/;
117 specialize qw/aom_dc_128_predictor_4x4 msa neon sse2/;
118 specialize qw/aom_dc_128_predictor_4x8 sse2/;
119 specialize qw/aom_dc_128_predictor_4x16 sse2/;
120 specialize qw/aom_dc_128_predictor_8x4 sse2/;
121 specialize qw/aom_dc_128_predictor_8x8 neon msa sse2/;
122 specialize qw/aom_dc_128_predictor_8x16 sse2/;
123 specialize qw/aom_dc_128_predictor_8x32 sse2/;
124 specialize qw/aom_dc_128_predictor_16x8 sse2/;
125 specialize qw/aom_dc_128_predictor_16x16 neon msa sse2/;
126 specialize qw/aom_dc_128_predictor_16x32 sse2/;
127 specialize qw/aom_dc_128_predictor_16x64 sse2/;
128 specialize qw/aom_dc_128_predictor_32x16 sse2 avx2/;
129 specialize qw/aom_dc_128_predictor_32x32 msa neon sse2 avx2/;
130 specialize qw/aom_dc_128_predictor_32x64 sse2 avx2/;
131 specialize qw/aom_dc_128_predictor_64x64 sse2 avx2/;
132 specialize qw/aom_dc_128_predictor_64x32 sse2 avx2/;
133 specialize qw/aom_dc_128_predictor_64x16 sse2 avx2/;
134 specialize qw/aom_v_predictor_4x4 neon msa sse2/;
135 specialize qw/aom_v_predictor_4x8 sse2/;
136 specialize qw/aom_v_predictor_4x16 sse2/;
137 specialize qw/aom_v_predictor_8x4 sse2/;
138 specialize qw/aom_v_predictor_8x8 neon msa sse2/;
139 specialize qw/aom_v_predictor_8x16 sse2/;
140 specialize qw/aom_v_predictor_8x32 sse2/;
141 specialize qw/aom_v_predictor_16x8 sse2/;
142 specialize qw/aom_v_predictor_16x16 neon msa sse2/;
143 specialize qw/aom_v_predictor_16x32 sse2/;
144 specialize qw/aom_v_predictor_16x64 sse2/;
145 specialize qw/aom_v_predictor_32x16 sse2 avx2/;
146 specialize qw/aom_v_predictor_32x32 neon msa sse2 avx2/;
147 specialize qw/aom_v_predictor_32x64 sse2 avx2/;
148 specialize qw/aom_v_predictor_64x64 sse2 avx2/;
149 specialize qw/aom_v_predictor_64x32 sse2 avx2/;
150 specialize qw/aom_v_predictor_64x16 sse2 avx2/;
151 specialize qw/aom_h_predictor_4x8 sse2/;
152 specialize qw/aom_h_predictor_4x16 sse2/;
153 specialize qw/aom_h_predictor_4x4 neon dspr2 msa sse2/;
154 specialize qw/aom_h_predictor_8x4 sse2/;
155 specialize qw/aom_h_predictor_8x8 neon dspr2 msa sse2/;
156 specialize qw/aom_h_predictor_8x16 sse2/;
157 specialize qw/aom_h_predictor_8x32 sse2/;
158 specialize qw/aom_h_predictor_16x8 sse2/;
159 specialize qw/aom_h_predictor_16x16 neon dspr2 msa sse2/;
160 specialize qw/aom_h_predictor_16x32 sse2/;
161 specialize qw/aom_h_predictor_16x64 sse2/;
162 specialize qw/aom_h_predictor_32x16 sse2/;
163 specialize qw/aom_h_predictor_32x32 neon msa sse2 avx2/;
164 specialize qw/aom_h_predictor_32x64 sse2/;
165 specialize qw/aom_h_predictor_64x64 sse2/;
166 specialize qw/aom_h_predictor_64x32 sse2/;
167 specialize qw/aom_h_predictor_64x16 sse2/;
168 specialize qw/aom_paeth_predictor_4x4 ssse3/;
169 specialize qw/aom_paeth_predictor_4x8 ssse3/;
170 specialize qw/aom_paeth_predictor_4x16 ssse3/;
171 specialize qw/aom_paeth_predictor_8x4 ssse3/;
172 specialize qw/aom_paeth_predictor_8x8 ssse3/;
173 specialize qw/aom_paeth_predictor_8x16 ssse3/;
174 specialize qw/aom_paeth_predictor_8x32 ssse3/;
175 specialize qw/aom_paeth_predictor_16x8 ssse3 avx2/;
176 specialize qw/aom_paeth_predictor_16x16 ssse3 avx2/;
177 specialize qw/aom_paeth_predictor_16x32 ssse3 avx2/;
178 specialize qw/aom_paeth_predictor_16x64 ssse3 avx2/;
179 specialize qw/aom_paeth_predictor_32x16 ssse3 avx2/;
180 specialize qw/aom_paeth_predictor_32x32 ssse3 avx2/;
181 specialize qw/aom_paeth_predictor_32x64 ssse3 avx2/;
182 specialize qw/aom_paeth_predictor_64x32 ssse3 avx2/;
183 specialize qw/aom_paeth_predictor_64x64 ssse3 avx2/;
184 specialize qw/aom_paeth_predictor_64x16 ssse3 avx2/;
185 specialize qw/aom_paeth_predictor_16x8 ssse3/;
186 specialize qw/aom_paeth_predictor_16x16 ssse3/;
187 specialize qw/aom_paeth_predictor_16x32 ssse3/;
188 specialize qw/aom_paeth_predictor_32x16 ssse3/;
189 specialize qw/aom_paeth_predictor_32x32 ssse3/;
190 specialize qw/aom_smooth_predictor_4x4 ssse3/;
191 specialize qw/aom_smooth_predictor_4x8 ssse3/;
192 specialize qw/aom_smooth_predictor_4x16 ssse3/;
193 specialize qw/aom_smooth_predictor_8x4 ssse3/;
194 specialize qw/aom_smooth_predictor_8x8 ssse3/;
195 specialize qw/aom_smooth_predictor_8x16 ssse3/;
196 specialize qw/aom_smooth_predictor_8x32 ssse3/;
197 specialize qw/aom_smooth_predictor_16x8 ssse3/;
198 specialize qw/aom_smooth_predictor_16x16 ssse3/;
199 specialize qw/aom_smooth_predictor_16x32 ssse3/;
200 specialize qw/aom_smooth_predictor_16x64 ssse3/;
201 specialize qw/aom_smooth_predictor_32x16 ssse3/;
202 specialize qw/aom_smooth_predictor_32x32 ssse3/;
203 specialize qw/aom_smooth_predictor_32x64 ssse3/;
204 specialize qw/aom_smooth_predictor_64x64 ssse3/;
205 specialize qw/aom_smooth_predictor_64x32 ssse3/;
206 specialize qw/aom_smooth_predictor_64x16 ssse3/;
208 specialize qw/aom_smooth_v_predictor_4x4 ssse3/;
209 specialize qw/aom_smooth_v_predictor_4x8 ssse3/;
210 specialize qw/aom_smooth_v_predictor_4x16 ssse3/;
211 specialize qw/aom_smooth_v_predictor_8x4 ssse3/;
212 specialize qw/aom_smooth_v_predictor_8x8 ssse3/;
213 specialize qw/aom_smooth_v_predictor_8x16 ssse3/;
214 specialize qw/aom_smooth_v_predictor_8x32 ssse3/;
215 specialize qw/aom_smooth_v_predictor_16x4 ssse3/;
216 specialize qw/aom_smooth_v_predictor_16x8 ssse3/;
217 specialize qw/aom_smooth_v_predictor_16x16 ssse3/;
218 specialize qw/aom_smooth_v_predictor_16x32 ssse3/;
219 specialize qw/aom_smooth_v_predictor_16x64 ssse3/;
220 specialize qw/aom_smooth_v_predictor_32x8 ssse3/;
221 specialize qw/aom_smooth_v_predictor_32x16 ssse3/;
222 specialize qw/aom_smooth_v_predictor_32x32 ssse3/;
223 specialize qw/aom_smooth_v_predictor_32x64 ssse3/;
224 specialize qw/aom_smooth_v_predictor_64x64 ssse3/;
225 specialize qw/aom_smooth_v_predictor_64x32 ssse3/;
226 specialize qw/aom_smooth_v_predictor_64x16 ssse3/;
228 specialize qw/aom_smooth_h_predictor_4x4 ssse3/;
229 specialize qw/aom_smooth_h_predictor_4x8 ssse3/;
230 specialize qw/aom_smooth_h_predictor_4x16 ssse3/;
231 specialize qw/aom_smooth_h_predictor_8x4 ssse3/;
232 specialize qw/aom_smooth_h_predictor_8x8 ssse3/;
233 specialize qw/aom_smooth_h_predictor_8x16 ssse3/;
234 specialize qw/aom_smooth_h_predictor_8x32 ssse3/;
235 specialize qw/aom_smooth_h_predictor_16x4 ssse3/;
236 specialize qw/aom_smooth_h_predictor_16x8 ssse3/;
237 specialize qw/aom_smooth_h_predictor_16x16 ssse3/;
238 specialize qw/aom_smooth_h_predictor_16x32 ssse3/;
239 specialize qw/aom_smooth_h_predictor_16x64 ssse3/;
240 specialize qw/aom_smooth_h_predictor_32x8 ssse3/;
241 specialize qw/aom_smooth_h_predictor_32x16 ssse3/;
242 specialize qw/aom_smooth_h_predictor_32x32 ssse3/;
243 specialize qw/aom_smooth_h_predictor_32x64 ssse3/;
244 specialize qw/aom_smooth_h_predictor_64x64 ssse3/;
245 specialize qw/aom_smooth_h_predictor_64x32 ssse3/;
246 specialize qw/aom_smooth_h_predictor_64x16 ssse3/;
248 # TODO(yunqingwang): optimize rectangular DC_PRED to replace division
249 # by multiply and shift.
250 specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
251 specialize qw/aom_dc_predictor_4x8 sse2/;
252 specialize qw/aom_dc_predictor_4x16 sse2/;
253 specialize qw/aom_dc_predictor_8x4 sse2/;
254 specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
255 specialize qw/aom_dc_predictor_8x16 sse2/;
256 specialize qw/aom_dc_predictor_8x32 sse2/;
257 specialize qw/aom_dc_predictor_16x8 sse2/;
258 specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
259 specialize qw/aom_dc_predictor_16x32 sse2/;
260 specialize qw/aom_dc_predictor_16x64 sse2/;
261 specialize qw/aom_dc_predictor_32x16 sse2 avx2/;
262 specialize qw/aom_dc_predictor_32x32 msa neon sse2 avx2/;
263 specialize qw/aom_dc_predictor_32x64 sse2 avx2/;
264 specialize qw/aom_dc_predictor_64x64 sse2 avx2/;
265 specialize qw/aom_dc_predictor_64x32 sse2 avx2/;
266 specialize qw/aom_dc_predictor_64x16 sse2 avx2/;
268 specialize qw/aom_highbd_v_predictor_4x4 sse2/;
269 specialize qw/aom_highbd_v_predictor_4x8 sse2/;
270 specialize qw/aom_highbd_v_predictor_8x4 sse2/;
271 specialize qw/aom_highbd_v_predictor_8x8 sse2/;
272 specialize qw/aom_highbd_v_predictor_8x16 sse2/;
273 specialize qw/aom_highbd_v_predictor_16x8 sse2/;
274 specialize qw/aom_highbd_v_predictor_16x16 sse2/;
275 specialize qw/aom_highbd_v_predictor_16x32 sse2/;
276 specialize qw/aom_highbd_v_predictor_32x16 sse2/;
277 specialize qw/aom_highbd_v_predictor_32x32 sse2/;
279 # TODO(yunqingwang): optimize rectangular DC_PRED to replace division
280 # by multiply and shift.
281 specialize qw/aom_highbd_dc_predictor_4x4 sse2/;
282 specialize qw/aom_highbd_dc_predictor_4x8 sse2/;
283 specialize qw/aom_highbd_dc_predictor_8x4 sse2/;;
284 specialize qw/aom_highbd_dc_predictor_8x8 sse2/;;
285 specialize qw/aom_highbd_dc_predictor_8x16 sse2/;;
286 specialize qw/aom_highbd_dc_predictor_16x8 sse2/;
287 specialize qw/aom_highbd_dc_predictor_16x16 sse2/;
288 specialize qw/aom_highbd_dc_predictor_16x32 sse2/;
289 specialize qw/aom_highbd_dc_predictor_32x16 sse2/;
290 specialize qw/aom_highbd_dc_predictor_32x32 sse2/;
292 specialize qw/aom_highbd_h_predictor_4x4 sse2/;
293 specialize qw/aom_highbd_h_predictor_4x8 sse2/;
294 specialize qw/aom_highbd_h_predictor_8x4 sse2/;
295 specialize qw/aom_highbd_h_predictor_8x8 sse2/;
296 specialize qw/aom_highbd_h_predictor_8x16 sse2/;
297 specialize qw/aom_highbd_h_predictor_16x8 sse2/;
298 specialize qw/aom_highbd_h_predictor_16x16 sse2/;
299 specialize qw/aom_highbd_h_predictor_16x32 sse2/;
300 specialize qw/aom_highbd_h_predictor_32x16 sse2/;
301 specialize qw/aom_highbd_h_predictor_32x32 sse2/;
302 specialize qw/aom_highbd_dc_left_predictor_4x4 sse2/;
303 specialize qw/aom_highbd_dc_top_predictor_4x4 sse2/;
304 specialize qw/aom_highbd_dc_128_predictor_4x4 sse2/;
305 specialize qw/aom_highbd_dc_left_predictor_4x8 sse2/;
306 specialize qw/aom_highbd_dc_top_predictor_4x8 sse2/;
307 specialize qw/aom_highbd_dc_128_predictor_4x8 sse2/;
308 specialize qw/aom_highbd_dc_left_predictor_8x4 sse2/;
309 specialize qw/aom_highbd_dc_top_predictor_8x4 sse2/;
310 specialize qw/aom_highbd_dc_128_predictor_8x4 sse2/;
311 specialize qw/aom_highbd_dc_left_predictor_8x8 sse2/;
312 specialize qw/aom_highbd_dc_top_predictor_8x8 sse2/;
313 specialize qw/aom_highbd_dc_128_predictor_8x8 sse2/;
314 specialize qw/aom_highbd_dc_left_predictor_8x16 sse2/;
315 specialize qw/aom_highbd_dc_top_predictor_8x16 sse2/;
316 specialize qw/aom_highbd_dc_128_predictor_8x16 sse2/;
317 specialize qw/aom_highbd_dc_left_predictor_16x8 sse2/;
318 specialize qw/aom_highbd_dc_top_predictor_16x8 sse2/;
319 specialize qw/aom_highbd_dc_128_predictor_16x8 sse2/;
320 specialize qw/aom_highbd_dc_left_predictor_16x16 sse2/;
321 specialize qw/aom_highbd_dc_top_predictor_16x16 sse2/;
322 specialize qw/aom_highbd_dc_128_predictor_16x16 sse2/;
323 specialize qw/aom_highbd_dc_left_predictor_16x32 sse2/;
324 specialize qw/aom_highbd_dc_top_predictor_16x32 sse2/;
325 specialize qw/aom_highbd_dc_128_predictor_16x32 sse2/;
326 specialize qw/aom_highbd_dc_left_predictor_32x16 sse2/;
327 specialize qw/aom_highbd_dc_top_predictor_32x16 sse2/;
328 specialize qw/aom_highbd_dc_128_predictor_32x16 sse2/;
329 specialize qw/aom_highbd_dc_left_predictor_32x32 sse2/;
330 specialize qw/aom_highbd_dc_top_predictor_32x32 sse2/;
331 specialize qw/aom_highbd_dc_128_predictor_32x32 sse2/;
334 # Sub Pixel Filters
336 add_proto qw/void aom_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
337 add_proto qw/void aom_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
338 add_proto qw/void aom_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
340 specialize qw/aom_convolve_copy sse2 /;
341 specialize qw/aom_convolve8_horiz sse2 ssse3/, "$avx2_ssse3";
342 specialize qw/aom_convolve8_vert sse2 ssse3/, "$avx2_ssse3";
344 # TODO(any): These need to be extended to up to 128x128 block sizes
345 if (!(aom_config("CONFIG_AV1") eq "yes")) {
346 specialize qw/aom_convolve_copy neon dspr2 msa/;
347 specialize qw/aom_convolve8_horiz neon dspr2 msa/;
348 specialize qw/aom_convolve8_vert neon dspr2 msa/;
351 add_proto qw/void aom_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
352 specialize qw/aom_highbd_convolve_copy sse2 avx2/;
354 add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
355 specialize qw/aom_highbd_convolve8_horiz avx2/, "$sse2_x86_64";
357 add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
358 specialize qw/aom_highbd_convolve8_vert avx2/, "$sse2_x86_64";
361 # Loopfilter
363 add_proto qw/void aom_lpf_vertical_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
364 specialize qw/aom_lpf_vertical_14 sse2/;
366 add_proto qw/void aom_lpf_vertical_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
367 specialize qw/aom_lpf_vertical_14_dual sse2/;
369 add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
370 specialize qw/aom_lpf_vertical_6 sse2/;
372 add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
373 specialize qw/aom_lpf_vertical_8 sse2/;
375 add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
377 add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
378 specialize qw/aom_lpf_vertical_4 sse2/;
380 add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
382 add_proto qw/void aom_lpf_horizontal_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
383 specialize qw/aom_lpf_horizontal_14 sse2/;
385 add_proto qw/void aom_lpf_horizontal_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
386 specialize qw/aom_lpf_horizontal_14_dual sse2/;
388 add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
389 specialize qw/aom_lpf_horizontal_6 sse2/;
391 add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
392 specialize qw/aom_lpf_horizontal_8 sse2/;
394 add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
396 add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
397 specialize qw/aom_lpf_horizontal_4 sse2/;
399 add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
401 add_proto qw/void aom_highbd_lpf_vertical_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
402 specialize qw/aom_highbd_lpf_vertical_14 sse2/;
404 add_proto qw/void aom_highbd_lpf_vertical_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
405 specialize qw/aom_highbd_lpf_vertical_14_dual sse2 avx2/;
407 add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
408 specialize qw/aom_highbd_lpf_vertical_8 sse2/;
410 add_proto qw/void aom_highbd_lpf_vertical_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
411 specialize qw/aom_highbd_lpf_vertical_6 sse2/;
413 add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
414 specialize qw/aom_highbd_lpf_vertical_8_dual sse2 avx2/;
416 add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
417 specialize qw/aom_highbd_lpf_vertical_4 sse2/;
419 add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
420 specialize qw/aom_highbd_lpf_vertical_4_dual sse2 avx2/;
422 add_proto qw/void aom_highbd_lpf_horizontal_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
423 specialize qw/aom_highbd_lpf_horizontal_14 sse2/;
425 add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
426 specialize qw/aom_highbd_lpf_horizontal_14_dual sse2 avx2/;
428 add_proto qw/void aom_highbd_lpf_horizontal_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
429 specialize qw/aom_highbd_lpf_horizontal_6 sse2/;
431 add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
432 specialize qw/aom_highbd_lpf_horizontal_8 sse2/;
434 add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
435 specialize qw/aom_highbd_lpf_horizontal_8_dual sse2 avx2/;
437 add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
438 specialize qw/aom_highbd_lpf_horizontal_4 sse2/;
440 add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
441 specialize qw/aom_highbd_lpf_horizontal_4_dual sse2 avx2/;
443 # Helper functions.
444 add_proto qw/void av1_round_shift_array/, "int32_t *arr, int size, int bit";
445 specialize "av1_round_shift_array", qw/sse4_1/;
448 # Encoder functions.
452 # Forward transform
454 if (aom_config("CONFIG_AV1_ENCODER") eq "yes"){
455 add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
456 specialize qw/aom_fdct4x4 sse2/;
458 add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
459 specialize qw/aom_fdct4x4_1 sse2/;
461 add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
462 specialize qw/aom_fdct8x8 sse2/, "$ssse3_x86_64";
464 # High bit depth
465 add_proto qw/void aom_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
466 specialize qw/aom_highbd_fdct4x4 sse2/;
468 add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
469 specialize qw/aom_highbd_fdct8x8 sse2/;
471 } # CONFIG_AV1_ENCODER
474 # Inverse transform
475 if (aom_config("CONFIG_AV1") eq "yes") {
476 add_proto qw/void aom_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
478 add_proto qw/void aom_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
479 } # CONFIG_AV1
482 # Quantization
484 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
485 add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
486 specialize qw/aom_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";
488 add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
489 specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
491 add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
492 } # CONFIG_AV1_ENCODER
494 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
495 add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
496 specialize qw/aom_highbd_quantize_b sse2 avx2/;
498 add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
499 specialize qw/aom_highbd_quantize_b_32x32 sse2/;
501 add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
503 } # CONFIG_AV1_ENCODER
504 if (aom_config("CONFIG_AV1") eq "yes") {
506 # Alpha blending with mask
508 add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, ConvolveParams *conv_params";
509 add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, ConvolveParams *conv_params, const int bd";
510 add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
511 add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
512 add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
513 specialize "aom_blend_a64_mask", qw/sse4_1/;
514 specialize "aom_blend_a64_hmask", qw/sse4_1/;
515 specialize "aom_blend_a64_vmask", qw/sse4_1/;
517 add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, int bd";
518 add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd";
519 add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd";
520 specialize "aom_highbd_blend_a64_mask", qw/sse4_1/;
521 specialize "aom_highbd_blend_a64_hmask", qw/sse4_1/;
522 specialize "aom_highbd_blend_a64_vmask", qw/sse4_1/;
524 } # CONFIG_AV1
526 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
528 # Block subtraction
530 add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
531 specialize qw/aom_subtract_block neon msa sse2/;
533 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
535 # Sum of Squares
537 add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height";
538 specialize qw/aom_sum_squares_2d_i16 sse2/;
540 add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N";
541 specialize qw/aom_sum_squares_i16 sse2/;
546 # Avg
548 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
550 # Avg
552 specialize qw/aom_avg_8x8 sse2 neon msa/;
553 add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
554 specialize qw/aom_highbd_subtract_block sse2/;
557 # Minmax
559 add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
560 specialize qw/aom_minmax_8x8 sse2 neon/;
561 add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
563 add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
564 specialize qw/aom_hadamard_8x8 sse2 neon/, "$ssse3_x86_64";
566 add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
567 specialize qw/aom_hadamard_16x16 sse2 neon/;
569 add_proto qw/int aom_satd/, "const int16_t *coeff, int length";
570 specialize qw/aom_satd sse2 neon/;
572 add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, int ref_stride, int height";
573 specialize qw/aom_int_pro_row sse2 neon/;
575 add_proto qw/int16_t aom_int_pro_col/, "const uint8_t *ref, int width";
576 specialize qw/aom_int_pro_col sse2 neon/;
578 add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl";
579 specialize qw/aom_vector_var neon sse2/;
580 } # CONFIG_AV1_ENCODER
583 # Single block SAD / Single block Avg SAD
585 foreach (@block_sizes) {
586 ($w, $h) = @$_;
587 add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
588 add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
589 add_proto qw/unsigned int/, "aom_jnt_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param";
592 specialize qw/aom_sad128x128 avx2 sse2/;
593 specialize qw/aom_sad128x64 avx2 sse2/;
594 specialize qw/aom_sad64x128 avx2 sse2/;
595 specialize qw/aom_sad64x64 avx2 neon msa sse2/;
596 specialize qw/aom_sad64x32 avx2 msa sse2/;
597 specialize qw/aom_sad32x64 avx2 msa sse2/;
598 specialize qw/aom_sad32x32 avx2 neon msa sse2/;
599 specialize qw/aom_sad32x16 avx2 msa sse2/;
600 specialize qw/aom_sad16x32 msa sse2/;
601 specialize qw/aom_sad16x16 neon msa sse2/;
602 specialize qw/aom_sad16x8 neon msa sse2/;
603 specialize qw/aom_sad8x16 neon msa sse2/;
604 specialize qw/aom_sad8x8 neon msa sse2/;
605 specialize qw/aom_sad8x4 msa sse2/;
606 specialize qw/aom_sad4x8 msa sse2/;
607 specialize qw/aom_sad4x4 neon msa sse2/;
609 specialize qw/aom_sad128x128_avg avx2 sse2/;
610 specialize qw/aom_sad128x64_avg avx2 sse2/;
611 specialize qw/aom_sad64x128_avg avx2 sse2/;
612 specialize qw/aom_sad64x64_avg avx2 msa sse2/;
613 specialize qw/aom_sad64x32_avg avx2 msa sse2/;
614 specialize qw/aom_sad32x64_avg avx2 msa sse2/;
615 specialize qw/aom_sad32x32_avg avx2 msa sse2/;
616 specialize qw/aom_sad32x16_avg avx2 msa sse2/;
617 specialize qw/aom_sad16x32_avg msa sse2/;
618 specialize qw/aom_sad16x16_avg msa sse2/;
619 specialize qw/aom_sad16x8_avg msa sse2/;
620 specialize qw/aom_sad8x16_avg msa sse2/;
621 specialize qw/aom_sad8x8_avg msa sse2/;
622 specialize qw/aom_sad8x4_avg msa sse2/;
623 specialize qw/aom_sad4x8_avg msa sse2/;
624 specialize qw/aom_sad4x4_avg msa sse2/;
626 specialize qw/aom_sad4x16 sse2/;
627 specialize qw/aom_sad16x4 sse2/;
628 specialize qw/aom_sad8x32 sse2/;
629 specialize qw/aom_sad32x8 sse2/;
630 specialize qw/aom_sad16x64 sse2/;
631 specialize qw/aom_sad64x16 sse2/;
633 specialize qw/aom_sad4x16_avg sse2/;
634 specialize qw/aom_sad16x4_avg sse2/;
635 specialize qw/aom_sad8x32_avg sse2/;
636 specialize qw/aom_sad32x8_avg sse2/;
637 specialize qw/aom_sad16x64_avg sse2/;
638 specialize qw/aom_sad64x16_avg sse2/;
640 specialize qw/aom_jnt_sad128x128_avg ssse3/;
641 specialize qw/aom_jnt_sad128x64_avg ssse3/;
642 specialize qw/aom_jnt_sad64x128_avg ssse3/;
643 specialize qw/aom_jnt_sad64x64_avg ssse3/;
644 specialize qw/aom_jnt_sad64x32_avg ssse3/;
645 specialize qw/aom_jnt_sad32x64_avg ssse3/;
646 specialize qw/aom_jnt_sad32x32_avg ssse3/;
647 specialize qw/aom_jnt_sad32x16_avg ssse3/;
648 specialize qw/aom_jnt_sad16x32_avg ssse3/;
649 specialize qw/aom_jnt_sad16x16_avg ssse3/;
650 specialize qw/aom_jnt_sad16x8_avg ssse3/;
651 specialize qw/aom_jnt_sad8x16_avg ssse3/;
652 specialize qw/aom_jnt_sad8x8_avg ssse3/;
653 specialize qw/aom_jnt_sad8x4_avg ssse3/;
654 specialize qw/aom_jnt_sad4x8_avg ssse3/;
655 specialize qw/aom_jnt_sad4x4_avg ssse3/;
657 specialize qw/aom_jnt_sad4x16_avg ssse3/;
658 specialize qw/aom_jnt_sad16x4_avg ssse3/;
659 specialize qw/aom_jnt_sad8x32_avg ssse3/;
660 specialize qw/aom_jnt_sad32x8_avg ssse3/;
661 specialize qw/aom_jnt_sad16x64_avg ssse3/;
662 specialize qw/aom_jnt_sad64x16_avg ssse3/;
664 add_proto qw/unsigned int/, "aom_sad4xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
665 add_proto qw/unsigned int/, "aom_sad8xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
666 add_proto qw/unsigned int/, "aom_sad16xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
667 add_proto qw/unsigned int/, "aom_sad32xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
668 add_proto qw/unsigned int/, "aom_sad64xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
669 add_proto qw/unsigned int/, "aom_sad128xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
671 specialize qw/aom_sad4xh sse2/;
672 specialize qw/aom_sad8xh sse2/;
673 specialize qw/aom_sad16xh sse2/;
674 specialize qw/aom_sad32xh sse2/;
675 specialize qw/aom_sad64xh sse2/;
676 specialize qw/aom_sad128xh sse2/;
679 foreach (@block_sizes) {
680 ($w, $h) = @$_;
681 add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
682 add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
683 if ($w != 128 && $h != 128 && $w != 4) {
684 specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
685 specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
687 add_proto qw/unsigned int/, "aom_highbd_jnt_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const JNT_COMP_PARAMS* jcp_param";
689 specialize qw/aom_highbd_sad128x128 avx2/;
690 specialize qw/aom_highbd_sad128x64 avx2/;
691 specialize qw/aom_highbd_sad64x128 avx2/;
692 specialize qw/aom_highbd_sad64x64 avx2 sse2/;
693 specialize qw/aom_highbd_sad64x32 avx2 sse2/;
694 specialize qw/aom_highbd_sad32x64 avx2 sse2/;
695 specialize qw/aom_highbd_sad32x32 avx2 sse2/;
696 specialize qw/aom_highbd_sad32x16 avx2 sse2/;
697 specialize qw/aom_highbd_sad16x32 avx2 sse2/;
698 specialize qw/aom_highbd_sad16x16 avx2 sse2/;
699 specialize qw/aom_highbd_sad16x8 avx2 sse2/;
700 specialize qw/aom_highbd_sad8x4 sse2/;
702 specialize qw/aom_highbd_sad128x128_avg avx2/;
703 specialize qw/aom_highbd_sad128x64_avg avx2/;
704 specialize qw/aom_highbd_sad64x128_avg avx2/;
705 specialize qw/aom_highbd_sad64x64_avg avx2 sse2/;
706 specialize qw/aom_highbd_sad64x32_avg avx2 sse2/;
707 specialize qw/aom_highbd_sad32x64_avg avx2 sse2/;
708 specialize qw/aom_highbd_sad32x32_avg avx2 sse2/;
709 specialize qw/aom_highbd_sad32x16_avg avx2 sse2/;
710 specialize qw/aom_highbd_sad16x32_avg avx2 sse2/;
711 specialize qw/aom_highbd_sad16x16_avg avx2 sse2/;
712 specialize qw/aom_highbd_sad16x8_avg avx2 sse2/;
713 specialize qw/aom_highbd_sad8x4_avg sse2/;
715 specialize qw/aom_highbd_sad16x4 sse2/;
716 specialize qw/aom_highbd_sad8x32 sse2/;
717 specialize qw/aom_highbd_sad32x8 sse2/;
718 specialize qw/aom_highbd_sad16x64 sse2/;
719 specialize qw/aom_highbd_sad64x16 sse2/;
721 specialize qw/aom_highbd_sad16x4_avg sse2/;
722 specialize qw/aom_highbd_sad8x32_avg sse2/;
723 specialize qw/aom_highbd_sad32x8_avg sse2/;
724 specialize qw/aom_highbd_sad16x64_avg sse2/;
725 specialize qw/aom_highbd_sad64x16_avg sse2/;
728 # Masked SAD
730 foreach (@block_sizes) {
731 ($w, $h) = @$_;
732 add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
733 specialize "aom_masked_sad${w}x${h}", qw/ssse3/;
737 foreach (@block_sizes) {
738 ($w, $h) = @$_;
739 add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask";
740 specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3/;
745 # OBMC SAD
747 foreach (@block_sizes) {
748 ($w, $h) = @$_;
749 add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
750 if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
751 specialize "aom_obmc_sad${w}x${h}", qw/sse4_1/;
756 foreach (@block_sizes) {
757 ($w, $h) = @$_;
758 add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
759 if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
760 specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1/;
766 # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
768 # Blocks of 3
769 foreach $s (@block_widths) {
770 add_proto qw/void/, "aom_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
772 specialize qw/aom_sad64x64x3 msa/;
773 specialize qw/aom_sad32x32x3 msa/;
774 specialize qw/aom_sad16x16x3 sse3 ssse3 msa/;
775 specialize qw/aom_sad8x8x3 sse3 msa/;
776 specialize qw/aom_sad4x4x3 sse3 msa/;
778 add_proto qw/void/, "aom_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
779 specialize qw/aom_sad16x8x3 sse3 ssse3 msa/;
780 add_proto qw/void/, "aom_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
781 specialize qw/aom_sad8x16x3 sse3 msa/;
783 # Blocks of 8
784 foreach $s (@block_widths) {
785 add_proto qw/void/, "aom_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
787 specialize qw/aom_sad64x64x8 msa/;
788 specialize qw/aom_sad32x32x8 msa/;
789 specialize qw/aom_sad16x16x8 sse4_1 msa/;
790 specialize qw/aom_sad8x8x8 sse4_1 msa/;
791 specialize qw/aom_sad4x4x8 sse4_1 msa/;
793 add_proto qw/void/, "aom_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
794 specialize qw/aom_sad16x8x8 sse4_1 msa/;
795 add_proto qw/void/, "aom_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
796 specialize qw/aom_sad8x16x8 sse4_1 msa/;
797 add_proto qw/void/, "aom_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
798 specialize qw/aom_sad8x4x8 msa/;
799 add_proto qw/void/, "aom_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
800 specialize qw/aom_sad4x8x8 msa/;
803 foreach $s (@block_widths) {
804 # Blocks of 3
805 add_proto qw/void/, "aom_highbd_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
806 # Blocks of 8
807 add_proto qw/void/, "aom_highbd_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
809 # Blocks of 3
810 add_proto qw/void/, "aom_highbd_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
811 add_proto qw/void/, "aom_highbd_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
812 # Blocks of 8
813 add_proto qw/void/, "aom_highbd_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
814 add_proto qw/void/, "aom_highbd_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
815 add_proto qw/void/, "aom_highbd_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
816 add_proto qw/void/, "aom_highbd_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
820 # Multi-block SAD, comparing a reference to N independent blocks
822 foreach (@block_sizes) {
823 ($w, $h) = @$_;
824 add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
827 specialize qw/aom_sad128x128x4d avx2 sse2/;
828 specialize qw/aom_sad128x64x4d avx2 sse2/;
829 specialize qw/aom_sad64x128x4d avx2 sse2/;
830 specialize qw/aom_sad64x64x4d avx2 neon msa sse2/;
831 specialize qw/aom_sad64x32x4d avx2 msa sse2/;
832 specialize qw/aom_sad32x64x4d avx2 msa sse2/;
833 specialize qw/aom_sad32x32x4d avx2 neon msa sse2/;
834 specialize qw/aom_sad32x16x4d msa sse2/;
835 specialize qw/aom_sad16x32x4d msa sse2/;
836 specialize qw/aom_sad16x16x4d neon msa sse2/;
837 specialize qw/aom_sad16x8x4d msa sse2/;
838 specialize qw/aom_sad8x16x4d msa sse2/;
839 specialize qw/aom_sad8x8x4d msa sse2/;
840 specialize qw/aom_sad8x4x4d msa sse2/;
841 specialize qw/aom_sad4x8x4d msa sse2/;
842 specialize qw/aom_sad4x4x4d msa sse2/;
844 specialize qw/aom_sad4x16x4d sse2/;
845 specialize qw/aom_sad16x4x4d sse2/;
846 specialize qw/aom_sad8x32x4d sse2/;
847 specialize qw/aom_sad32x8x4d sse2/;
848 specialize qw/aom_sad16x64x4d sse2/;
849 specialize qw/aom_sad64x16x4d sse2/;
852 # Multi-block SAD, comparing a reference to N independent blocks
854 foreach (@block_sizes) {
855 ($w, $h) = @$_;
856 add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
857 if ($w != 128 && $h != 128) {
858 specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/;
861 specialize qw/aom_highbd_sad128x128x4d avx2/;
862 specialize qw/aom_highbd_sad128x64x4d avx2/;
863 specialize qw/aom_highbd_sad64x128x4d avx2/;
864 specialize qw/aom_highbd_sad64x64x4d sse2 avx2/;
865 specialize qw/aom_highbd_sad64x32x4d sse2 avx2/;
866 specialize qw/aom_highbd_sad32x64x4d sse2 avx2/;
867 specialize qw/aom_highbd_sad32x32x4d sse2 avx2/;
868 specialize qw/aom_highbd_sad32x16x4d sse2 avx2/;
869 specialize qw/aom_highbd_sad16x32x4d sse2 avx2/;
870 specialize qw/aom_highbd_sad16x16x4d sse2 avx2/;
871 specialize qw/aom_highbd_sad16x8x4d sse2 avx2/;
872 specialize qw/aom_highbd_sad8x16x4d sse2/;
873 specialize qw/aom_highbd_sad8x8x4d sse2/;
874 specialize qw/aom_highbd_sad8x4x4d sse2/;
875 specialize qw/aom_highbd_sad4x8x4d sse2/;
876 specialize qw/aom_highbd_sad4x4x4d sse2/;
878 specialize qw/aom_highbd_sad4x16x4d sse2/;
879 specialize qw/aom_highbd_sad16x4x4d sse2/;
880 specialize qw/aom_highbd_sad8x32x4d sse2/;
881 specialize qw/aom_highbd_sad32x8x4d sse2/;
882 specialize qw/aom_highbd_sad16x64x4d sse2/;
883 specialize qw/aom_highbd_sad64x16x4d sse2/;
887 # Structured Similarity (SSIM)
889 if (aom_config("CONFIG_INTERNAL_STATS") eq "yes") {
890 add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
891 specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64";
893 add_proto qw/void aom_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
894 specialize qw/aom_ssim_parms_16x16/, "$sse2_x86_64";
896 add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
899 } # CONFIG_AV1_ENCODER
901 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
904 # Specialty Variance
906 add_proto qw/void aom_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
908 add_proto qw/void aom_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
910 specialize qw/aom_get16x16var sse2 avx2 neon msa/;
911 specialize qw/aom_get8x8var sse2 neon msa/;
914 add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
915 add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
916 add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
917 add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
919 specialize qw/aom_mse16x16 sse2 avx2 neon msa/;
920 specialize qw/aom_mse16x8 sse2 msa/;
921 specialize qw/aom_mse8x16 sse2 msa/;
922 specialize qw/aom_mse8x8 sse2 msa/;
924 foreach $bd (8, 10, 12) {
925 add_proto qw/void/, "aom_highbd_${bd}_get16x16var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
926 add_proto qw/void/, "aom_highbd_${bd}_get8x8var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
928 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
929 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
930 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
931 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
933 specialize "aom_highbd_${bd}_mse16x16", qw/sse2/;
934 specialize "aom_highbd_${bd}_mse8x8", qw/sse2/;
941 add_proto qw/void aom_upsampled_pred/, "uint8_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
942 specialize qw/aom_upsampled_pred sse2/;
944 add_proto qw/void aom_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
945 specialize qw/aom_comp_avg_upsampled_pred sse2/;
947 add_proto qw/void aom_jnt_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride, const JNT_COMP_PARAMS *jcp_param";
948 specialize qw/aom_jnt_comp_avg_upsampled_pred ssse3/;
951 add_proto qw/void aom_highbd_upsampled_pred/, "uint16_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
952 specialize qw/aom_highbd_upsampled_pred sse2/;
953 add_proto qw/void aom_highbd_comp_avg_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
954 specialize qw/aom_highbd_comp_avg_upsampled_pred sse2/;
956 add_proto qw/void aom_highbd_jnt_comp_avg_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param";
957 specialize qw/aom_highbd_jnt_comp_avg_upsampled_pred sse2/;
963 add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *";
964 add_proto qw/unsigned int aom_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
966 specialize qw/aom_get_mb_ss sse2 msa/;
967 specialize qw/aom_get4x4sse_cs neon msa/;
970 # Variance / Subpixel Variance / Subpixel Avg Variance
972 add_proto qw/unsigned int/, "aom_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
974 add_proto qw/unsigned int/, "aom_variance2x4", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
976 add_proto qw/unsigned int/, "aom_variance4x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
978 foreach (@block_sizes) {
979 ($w, $h) = @$_;
980 add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
981 add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
982 add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
983 add_proto qw/uint32_t/, "aom_jnt_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param";
985 specialize qw/aom_variance128x128 sse2 avx2 /;
986 specialize qw/aom_variance128x64 sse2 avx2 /;
987 specialize qw/aom_variance64x128 sse2 avx2 /;
988 specialize qw/aom_variance64x64 sse2 avx2 neon msa/;
989 specialize qw/aom_variance64x32 sse2 avx2 neon msa/;
990 specialize qw/aom_variance32x64 sse2 neon msa/;
991 specialize qw/aom_variance32x32 sse2 avx2 neon msa/;
992 specialize qw/aom_variance32x16 sse2 avx2 msa/;
993 specialize qw/aom_variance16x32 sse2 msa/;
994 specialize qw/aom_variance16x16 sse2 avx2 neon msa/;
995 specialize qw/aom_variance16x8 sse2 neon msa/;
996 specialize qw/aom_variance8x16 sse2 neon msa/;
997 specialize qw/aom_variance8x8 sse2 neon msa/;
998 specialize qw/aom_variance8x4 sse2 msa/;
999 specialize qw/aom_variance4x8 sse2 msa/;
1000 specialize qw/aom_variance4x4 sse2 msa/;
1002 specialize qw/aom_sub_pixel_variance64x64 avx2 neon msa sse2 ssse3/;
1003 specialize qw/aom_sub_pixel_variance64x32 msa sse2 ssse3/;
1004 specialize qw/aom_sub_pixel_variance32x64 msa sse2 ssse3/;
1005 specialize qw/aom_sub_pixel_variance32x32 avx2 neon msa sse2 ssse3/;
1006 specialize qw/aom_sub_pixel_variance32x16 msa sse2 ssse3/;
1007 specialize qw/aom_sub_pixel_variance16x32 msa sse2 ssse3/;
1008 specialize qw/aom_sub_pixel_variance16x16 neon msa sse2 ssse3/;
1009 specialize qw/aom_sub_pixel_variance16x8 msa sse2 ssse3/;
1010 specialize qw/aom_sub_pixel_variance8x16 msa sse2 ssse3/;
1011 specialize qw/aom_sub_pixel_variance8x8 neon msa sse2 ssse3/;
1012 specialize qw/aom_sub_pixel_variance8x4 msa sse2 ssse3/;
1013 specialize qw/aom_sub_pixel_variance4x8 msa sse2 ssse3/;
1014 specialize qw/aom_sub_pixel_variance4x4 msa sse2 ssse3/;
1016 specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;
1017 specialize qw/aom_sub_pixel_avg_variance64x32 msa sse2 ssse3/;
1018 specialize qw/aom_sub_pixel_avg_variance32x64 msa sse2 ssse3/;
1019 specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;
1020 specialize qw/aom_sub_pixel_avg_variance32x16 msa sse2 ssse3/;
1021 specialize qw/aom_sub_pixel_avg_variance16x32 msa sse2 ssse3/;
1022 specialize qw/aom_sub_pixel_avg_variance16x16 msa sse2 ssse3/;
1023 specialize qw/aom_sub_pixel_avg_variance16x8 msa sse2 ssse3/;
1024 specialize qw/aom_sub_pixel_avg_variance8x16 msa sse2 ssse3/;
1025 specialize qw/aom_sub_pixel_avg_variance8x8 msa sse2 ssse3/;
1026 specialize qw/aom_sub_pixel_avg_variance8x4 msa sse2 ssse3/;
1027 specialize qw/aom_sub_pixel_avg_variance4x8 msa sse2 ssse3/;
1028 specialize qw/aom_sub_pixel_avg_variance4x4 msa sse2 ssse3/;
1030 specialize qw/aom_variance4x16 sse2/;
1031 specialize qw/aom_variance16x4 sse2/;
1032 specialize qw/aom_variance8x32 sse2/;
1033 specialize qw/aom_variance32x8 sse2/;
1034 specialize qw/aom_variance16x64 sse2/;
1035 specialize qw/aom_variance64x16 sse2/;
1036 specialize qw/aom_sub_pixel_variance4x16 sse2 ssse3/;
1037 specialize qw/aom_sub_pixel_variance16x4 sse2 ssse3/;
1038 specialize qw/aom_sub_pixel_variance8x32 sse2 ssse3/;
1039 specialize qw/aom_sub_pixel_variance32x8 sse2 ssse3/;
1040 specialize qw/aom_sub_pixel_variance16x64 sse2 ssse3/;
1041 specialize qw/aom_sub_pixel_variance64x16 sse2 ssse3/;
1042 specialize qw/aom_sub_pixel_avg_variance4x16 sse2 ssse3/;
1043 specialize qw/aom_sub_pixel_avg_variance16x4 sse2 ssse3/;
1044 specialize qw/aom_sub_pixel_avg_variance8x32 sse2 ssse3/;
1045 specialize qw/aom_sub_pixel_avg_variance32x8 sse2 ssse3/;
1046 specialize qw/aom_sub_pixel_avg_variance16x64 sse2 ssse3/;
1047 specialize qw/aom_sub_pixel_avg_variance64x16 sse2 ssse3/;
1049 specialize qw/aom_jnt_sub_pixel_avg_variance64x64 ssse3/;
1050 specialize qw/aom_jnt_sub_pixel_avg_variance64x32 ssse3/;
1051 specialize qw/aom_jnt_sub_pixel_avg_variance32x64 ssse3/;
1052 specialize qw/aom_jnt_sub_pixel_avg_variance32x32 ssse3/;
1053 specialize qw/aom_jnt_sub_pixel_avg_variance32x16 ssse3/;
1054 specialize qw/aom_jnt_sub_pixel_avg_variance16x32 ssse3/;
1055 specialize qw/aom_jnt_sub_pixel_avg_variance16x16 ssse3/;
1056 specialize qw/aom_jnt_sub_pixel_avg_variance16x8 ssse3/;
1057 specialize qw/aom_jnt_sub_pixel_avg_variance8x16 ssse3/;
1058 specialize qw/aom_jnt_sub_pixel_avg_variance8x8 ssse3/;
1059 specialize qw/aom_jnt_sub_pixel_avg_variance8x4 ssse3/;
1060 specialize qw/aom_jnt_sub_pixel_avg_variance4x8 ssse3/;
1061 specialize qw/aom_jnt_sub_pixel_avg_variance4x4 ssse3/;
1063 specialize qw/aom_jnt_sub_pixel_avg_variance4x16 ssse3/;
1064 specialize qw/aom_jnt_sub_pixel_avg_variance16x4 ssse3/;
1065 specialize qw/aom_jnt_sub_pixel_avg_variance8x32 ssse3/;
1066 specialize qw/aom_jnt_sub_pixel_avg_variance32x8 ssse3/;
1067 specialize qw/aom_jnt_sub_pixel_avg_variance16x64 ssse3/;
1068 specialize qw/aom_jnt_sub_pixel_avg_variance64x16 ssse3/;
1070 specialize qw/aom_jnt_sub_pixel_avg_variance128x128 ssse3/;
1071 specialize qw/aom_jnt_sub_pixel_avg_variance128x64 ssse3/;
1072 specialize qw/aom_jnt_sub_pixel_avg_variance64x128 ssse3/;
1075 foreach $bd (8, 10, 12) {
1076 add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1078 add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x4", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1080 add_proto qw/unsigned int/, "aom_highbd_${bd}_variance4x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1082 foreach (@block_sizes) {
1083 ($w, $h) = @$_;
1084 add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1085 add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1086 add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1087 if ($w != 128 && $h != 128 && $w != 4 && $h != 4) {
1088 specialize "aom_highbd_${bd}_variance${w}x${h}", "sse2";
1090 # TODO(david.barker): When ext-partition-types is enabled, we currently
1091 # don't have vectorized 4x16 highbd variance functions
1092 if ($w == 4 && $h == 4) {
1093 specialize "aom_highbd_${bd}_variance${w}x${h}", "sse4_1";
1095 if ($w != 128 && $h != 128 && $w != 4) {
1096 specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", qw/sse2/;
1097 specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", qw/sse2/;
1099 if ($w == 4 && $h == 4) {
1100 specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "sse4_1";
1101 specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "sse4_1";
1104 add_proto qw/uint32_t/, "aom_highbd_${bd}_jnt_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const JNT_COMP_PARAMS* jcp_param";
1109 # Masked Variance / Masked Subpixel Variance
1111 foreach (@block_sizes) {
1112 ($w, $h) = @$_;
1113 add_proto qw/unsigned int/, "aom_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
1114 specialize "aom_masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
1118 foreach $bd ("_8_", "_10_", "_12_") {
1119 foreach (@block_sizes) {
1120 ($w, $h) = @$_;
1121 add_proto qw/unsigned int/, "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
1122 specialize "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
1128 # OBMC Variance / OBMC Subpixel Variance
1130 foreach (@block_sizes) {
1131 ($w, $h) = @$_;
1132 add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1133 add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1134 specialize "aom_obmc_variance${w}x${h}", q/sse4_1/;
1138 foreach $bd ("_", "_10_", "_12_") {
1139 foreach (@block_sizes) {
1140 ($w, $h) = @$_;
1141 add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1142 add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1143 specialize "aom_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1/;
1148 add_proto qw/uint32_t aom_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1149 specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;
1151 add_proto qw/uint32_t aom_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1152 specialize qw/aom_sub_pixel_avg_variance64x32 msa sse2 ssse3/;
1154 add_proto qw/uint32_t aom_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1155 specialize qw/aom_sub_pixel_avg_variance32x64 msa sse2 ssse3/;
1157 add_proto qw/uint32_t aom_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1158 specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;
1160 add_proto qw/uint32_t aom_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1161 specialize qw/aom_sub_pixel_avg_variance32x16 msa sse2 ssse3/;
1163 add_proto qw/uint32_t aom_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1164 specialize qw/aom_sub_pixel_avg_variance16x32 msa sse2 ssse3/;
1166 add_proto qw/uint32_t aom_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1167 specialize qw/aom_sub_pixel_avg_variance16x16 msa sse2 ssse3/;
1169 add_proto qw/uint32_t aom_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1170 specialize qw/aom_sub_pixel_avg_variance16x8 msa sse2 ssse3/;
1172 add_proto qw/uint32_t aom_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1173 specialize qw/aom_sub_pixel_avg_variance8x16 msa sse2 ssse3/;
1175 add_proto qw/uint32_t aom_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1176 specialize qw/aom_sub_pixel_avg_variance8x8 msa sse2 ssse3/;
1178 add_proto qw/uint32_t aom_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1179 specialize qw/aom_sub_pixel_avg_variance8x4 msa sse2 ssse3/;
1181 add_proto qw/uint32_t aom_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1182 specialize qw/aom_sub_pixel_avg_variance4x8 msa sse2 ssse3/;
1184 add_proto qw/uint32_t aom_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1185 specialize qw/aom_sub_pixel_avg_variance4x4 msa sse2 ssse3/;
1187 # Specialty Subpixel
1189 add_proto qw/uint32_t aom_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
1190 specialize qw/aom_variance_halfpixvar16x16_h sse2/;
1192 add_proto qw/uint32_t aom_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
1193 specialize qw/aom_variance_halfpixvar16x16_v sse2/;
1195 add_proto qw/uint32_t aom_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
1196 specialize qw/aom_variance_halfpixvar16x16_hv sse2/;
1199 # Comp Avg
1201 add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
1203 add_proto qw/void aom_jnt_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const JNT_COMP_PARAMS *jcp_param";
1204 specialize qw/aom_jnt_comp_avg_pred ssse3/;
1207 add_proto qw/unsigned int aom_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1208 specialize qw/aom_highbd_12_variance64x64 sse2/;
1210 add_proto qw/unsigned int aom_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1211 specialize qw/aom_highbd_12_variance64x32 sse2/;
1213 add_proto qw/unsigned int aom_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1214 specialize qw/aom_highbd_12_variance32x64 sse2/;
1216 add_proto qw/unsigned int aom_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1217 specialize qw/aom_highbd_12_variance32x32 sse2/;
1219 add_proto qw/unsigned int aom_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1220 specialize qw/aom_highbd_12_variance32x16 sse2/;
1222 add_proto qw/unsigned int aom_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1223 specialize qw/aom_highbd_12_variance16x32 sse2/;
1225 add_proto qw/unsigned int aom_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1226 specialize qw/aom_highbd_12_variance16x16 sse2/;
1228 add_proto qw/unsigned int aom_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1229 specialize qw/aom_highbd_12_variance16x8 sse2/;
1231 add_proto qw/unsigned int aom_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1232 specialize qw/aom_highbd_12_variance8x16 sse2/;
1234 add_proto qw/unsigned int aom_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1235 specialize qw/aom_highbd_12_variance8x8 sse2/;
1237 add_proto qw/unsigned int aom_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1238 add_proto qw/unsigned int aom_highbd_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1239 add_proto qw/unsigned int aom_highbd_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1241 add_proto qw/unsigned int aom_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1242 specialize qw/aom_highbd_10_variance64x64 sse2/;
1244 add_proto qw/unsigned int aom_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1245 specialize qw/aom_highbd_10_variance64x32 sse2/;
1247 add_proto qw/unsigned int aom_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1248 specialize qw/aom_highbd_10_variance32x64 sse2/;
1250 add_proto qw/unsigned int aom_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1251 specialize qw/aom_highbd_10_variance32x32 sse2/;
1253 add_proto qw/unsigned int aom_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1254 specialize qw/aom_highbd_10_variance32x16 sse2/;
1256 add_proto qw/unsigned int aom_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1257 specialize qw/aom_highbd_10_variance16x32 sse2/;
1259 add_proto qw/unsigned int aom_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1260 specialize qw/aom_highbd_10_variance16x16 sse2/;
1262 add_proto qw/unsigned int aom_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1263 specialize qw/aom_highbd_10_variance16x8 sse2/;
1265 add_proto qw/unsigned int aom_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1266 specialize qw/aom_highbd_10_variance8x16 sse2/;
1268 add_proto qw/unsigned int aom_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1269 specialize qw/aom_highbd_10_variance8x8 sse2/;
1271 add_proto qw/unsigned int aom_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1272 add_proto qw/unsigned int aom_highbd_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1273 add_proto qw/unsigned int aom_highbd_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1275 add_proto qw/unsigned int aom_highbd_8_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1276 specialize qw/aom_highbd_8_variance64x64 sse2/;
1278 add_proto qw/unsigned int aom_highbd_8_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1279 specialize qw/aom_highbd_8_variance64x32 sse2/;
1281 add_proto qw/unsigned int aom_highbd_8_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1282 specialize qw/aom_highbd_8_variance32x64 sse2/;
1284 add_proto qw/unsigned int aom_highbd_8_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1285 specialize qw/aom_highbd_8_variance32x32 sse2/;
1287 add_proto qw/unsigned int aom_highbd_8_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1288 specialize qw/aom_highbd_8_variance32x16 sse2/;
1290 add_proto qw/unsigned int aom_highbd_8_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1291 specialize qw/aom_highbd_8_variance16x32 sse2/;
1293 add_proto qw/unsigned int aom_highbd_8_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1294 specialize qw/aom_highbd_8_variance16x16 sse2/;
1296 add_proto qw/unsigned int aom_highbd_8_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1297 specialize qw/aom_highbd_8_variance16x8 sse2/;
1299 add_proto qw/unsigned int aom_highbd_8_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1300 specialize qw/aom_highbd_8_variance8x16 sse2/;
1302 add_proto qw/unsigned int aom_highbd_8_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1303 specialize qw/aom_highbd_8_variance8x8 sse2/;
1305 add_proto qw/unsigned int aom_highbd_8_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1306 add_proto qw/unsigned int aom_highbd_8_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1307 add_proto qw/unsigned int aom_highbd_8_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1309 add_proto qw/void aom_highbd_8_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1310 add_proto qw/void aom_highbd_8_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1312 add_proto qw/void aom_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1313 add_proto qw/void aom_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1315 add_proto qw/void aom_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1316 add_proto qw/void aom_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1318 add_proto qw/unsigned int aom_highbd_8_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1319 specialize qw/aom_highbd_8_mse16x16 sse2/;
1321 add_proto qw/unsigned int aom_highbd_8_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1322 add_proto qw/unsigned int aom_highbd_8_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1323 add_proto qw/unsigned int aom_highbd_8_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1324 specialize qw/aom_highbd_8_mse8x8 sse2/;
1326 add_proto qw/unsigned int aom_highbd_10_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1327 specialize qw/aom_highbd_10_mse16x16 sse2/;
1329 add_proto qw/unsigned int aom_highbd_10_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1330 add_proto qw/unsigned int aom_highbd_10_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1331 add_proto qw/unsigned int aom_highbd_10_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1332 specialize qw/aom_highbd_10_mse8x8 sse2/;
1334 add_proto qw/unsigned int aom_highbd_12_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1335 specialize qw/aom_highbd_12_mse16x16 sse2/;
1337 add_proto qw/unsigned int aom_highbd_12_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1338 add_proto qw/unsigned int aom_highbd_12_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1339 add_proto qw/unsigned int aom_highbd_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
1340 specialize qw/aom_highbd_12_mse8x8 sse2/;
1342 add_proto qw/void aom_highbd_comp_avg_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
1344 add_proto qw/void aom_highbd_jnt_comp_avg_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const JNT_COMP_PARAMS *jcp_param";
1345 specialize qw/aom_highbd_jnt_comp_avg_pred c/;
1348 # Subpixel Variance
1350 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1351 specialize qw/aom_highbd_12_sub_pixel_variance64x64 sse2/;
1353 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1354 specialize qw/aom_highbd_12_sub_pixel_variance64x32 sse2/;
1356 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1357 specialize qw/aom_highbd_12_sub_pixel_variance32x64 sse2/;
1359 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1360 specialize qw/aom_highbd_12_sub_pixel_variance32x32 sse2/;
1362 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1363 specialize qw/aom_highbd_12_sub_pixel_variance32x16 sse2/;
1365 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1366 specialize qw/aom_highbd_12_sub_pixel_variance16x32 sse2/;
1368 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1369 specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2/;
1371 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1372 specialize qw/aom_highbd_12_sub_pixel_variance16x8 sse2/;
1374 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1375 specialize qw/aom_highbd_12_sub_pixel_variance8x16 sse2/;
1377 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1378 specialize qw/aom_highbd_12_sub_pixel_variance8x8 sse2/;
1380 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1381 specialize qw/aom_highbd_12_sub_pixel_variance8x4 sse2/;
1383 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1384 add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1386 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1387 specialize qw/aom_highbd_10_sub_pixel_variance64x64 sse2/;
1389 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1390 specialize qw/aom_highbd_10_sub_pixel_variance64x32 sse2/;
1392 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1393 specialize qw/aom_highbd_10_sub_pixel_variance32x64 sse2/;
1395 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1396 specialize qw/aom_highbd_10_sub_pixel_variance32x32 sse2/;
1398 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1399 specialize qw/aom_highbd_10_sub_pixel_variance32x16 sse2/;
1401 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1402 specialize qw/aom_highbd_10_sub_pixel_variance16x32 sse2/;
1404 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1405 specialize qw/aom_highbd_10_sub_pixel_variance16x16 sse2/;
1407 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1408 specialize qw/aom_highbd_10_sub_pixel_variance16x8 sse2/;
1410 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1411 specialize qw/aom_highbd_10_sub_pixel_variance8x16 sse2/;
1413 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1414 specialize qw/aom_highbd_10_sub_pixel_variance8x8 sse2/;
1416 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1417 specialize qw/aom_highbd_10_sub_pixel_variance8x4 sse2/;
1419 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1420 add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1422 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1423 specialize qw/aom_highbd_8_sub_pixel_variance64x64 sse2/;
1425 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1426 specialize qw/aom_highbd_8_sub_pixel_variance64x32 sse2/;
1428 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1429 specialize qw/aom_highbd_8_sub_pixel_variance32x64 sse2/;
1431 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1432 specialize qw/aom_highbd_8_sub_pixel_variance32x32 sse2/;
1434 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1435 specialize qw/aom_highbd_8_sub_pixel_variance32x16 sse2/;
1437 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1438 specialize qw/aom_highbd_8_sub_pixel_variance16x32 sse2/;
1440 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1441 specialize qw/aom_highbd_8_sub_pixel_variance16x16 sse2/;
1443 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1444 specialize qw/aom_highbd_8_sub_pixel_variance16x8 sse2/;
1446 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1447 specialize qw/aom_highbd_8_sub_pixel_variance8x16 sse2/;
1449 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1450 specialize qw/aom_highbd_8_sub_pixel_variance8x8 sse2/;
1452 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1453 specialize qw/aom_highbd_8_sub_pixel_variance8x4 sse2/;
1455 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1456 add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1458 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1459 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x64 sse2/;
1461 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1462 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x32 sse2/;
1464 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1465 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x64 sse2/;
1467 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1468 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x32 sse2/;
1470 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1471 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x16 sse2/;
1473 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1474 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32 sse2/;
1476 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1477 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16 sse2/;
1479 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1480 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8 sse2/;
1482 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1483 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16 sse2/;
1485 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1486 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x8 sse2/;
1488 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1489 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x4 sse2/;
1491 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1492 add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1494 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1495 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x64 sse2/;
1497 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1498 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x32 sse2/;
1500 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1501 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x64 sse2/;
1503 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1504 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x32 sse2/;
1506 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1507 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x16 sse2/;
1509 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1510 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32 sse2/;
1512 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1513 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16 sse2/;
1515 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1516 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8 sse2/;
1518 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1519 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16 sse2/;
1521 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1522 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x8 sse2/;
1524 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1525 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x4 sse2/;
1527 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1528 add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1530 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1531 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x64 sse2/;
1533 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1534 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x32 sse2/;
1536 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1537 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x64 sse2/;
1539 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1540 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x32 sse2/;
1542 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1543 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x16 sse2/;
1545 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1546 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32 sse2/;
1548 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1549 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16 sse2/;
1551 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1552 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8 sse2/;
1554 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1555 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16 sse2/;
1557 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1558 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x8 sse2/;
1560 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1561 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x4 sse2/;
1563 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1564 add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1568 add_proto qw/void aom_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
1569 specialize qw/aom_comp_mask_pred ssse3 avx2/;
1571 add_proto qw/void aom_highbd_comp_mask_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
1572 add_proto qw/void aom_highbd_comp_mask_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask, int bd";
1575 } # CONFIG_AV1_ENCODER