2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
11 %include "third_party/x86inc/x86inc.asm"
21 cglobal highbd_dc_predictor_4x4
, 4, 5, 4, dst
, stride
, above
, left
, goffset
26 DEFINE_ARGS dst
, stride
, one
35 paddw m0
, [GLOBAL(pw_4
)]
39 movq
[dstq
+strideq
*2], m0
40 lea dstq
, [dstq
+strideq
*4]
42 movq
[dstq
+strideq
*2], m0
48 cglobal highbd_dc_predictor_8x8
, 4, 5, 4, dst
, stride
, above
, left
, goffset
54 DEFINE_ARGS dst
, stride
, stride3
, one
56 lea stride3q
, [strideq
*3]
65 paddw m0
, [GLOBAL(pw_8
)]
70 mova
[dstq
+strideq
*2 ], m0
71 mova
[dstq
+strideq
*4 ], m0
72 mova
[dstq
+stride3q
*2], m0
73 lea dstq
, [dstq
+strideq
*8]
75 mova
[dstq
+strideq
*2 ], m0
76 mova
[dstq
+strideq
*4 ], m0
77 mova
[dstq
+stride3q
*2], m0
83 cglobal highbd_dc_predictor_16x16
, 4, 5, 5, dst
, stride
, above
, left
, goffset
91 DEFINE_ARGS dst
, stride
, stride3
, lines4
92 lea stride3q
, [strideq
*3]
105 paddd m0
, [GLOBAL(pw_16
)]
112 mova
[dstq
+strideq
*2 ], m0
113 mova
[dstq
+strideq
*2 +16], m0
114 mova
[dstq
+strideq
*4 ], m0
115 mova
[dstq
+strideq
*4 +16], m0
116 mova
[dstq
+stride3q
*2 ], m0
117 mova
[dstq
+stride3q
*2+16], m0
118 lea dstq
, [dstq
+strideq
*8]
127 cglobal highbd_dc_predictor_32x32
, 4, 5, 9, dst
, stride
, above
, left
, goffset
139 DEFINE_ARGS dst
, stride
, stride3
, lines4
140 lea stride3q
, [strideq
*3]
157 paddd m0
, [GLOBAL(pw_32
)]
166 mova
[dstq
+strideq
*2 ], m0
167 mova
[dstq
+strideq
*2+16 ], m0
168 mova
[dstq
+strideq
*2+32 ], m0
169 mova
[dstq
+strideq
*2+48 ], m0
170 mova
[dstq
+strideq
*4 ], m0
171 mova
[dstq
+strideq
*4+16 ], m0
172 mova
[dstq
+strideq
*4+32 ], m0
173 mova
[dstq
+strideq
*4+48 ], m0
174 mova
[dstq
+stride3q
*2 ], m0
175 mova
[dstq
+stride3q
*2 +16], m0
176 mova
[dstq
+stride3q
*2 +32], m0
177 mova
[dstq
+stride3q
*2 +48], m0
178 lea dstq
, [dstq
+strideq
*8]
187 cglobal highbd_v_predictor_4x4
, 3, 3, 1, dst
, stride
, above
190 movq
[dstq
+strideq
*2], m0
191 lea dstq
, [dstq
+strideq
*4]
193 movq
[dstq
+strideq
*2], m0
197 cglobal highbd_v_predictor_8x8
, 3, 3, 1, dst
, stride
, above
199 DEFINE_ARGS dst
, stride
, stride3
200 lea stride3q
, [strideq
*3]
202 mova
[dstq
+strideq
*2 ], m0
203 mova
[dstq
+strideq
*4 ], m0
204 mova
[dstq
+stride3q
*2], m0
205 lea dstq
, [dstq
+strideq
*8]
207 mova
[dstq
+strideq
*2 ], m0
208 mova
[dstq
+strideq
*4 ], m0
209 mova
[dstq
+stride3q
*2], m0
213 cglobal highbd_v_predictor_16x16
, 3, 4, 2, dst
, stride
, above
216 DEFINE_ARGS dst
, stride
, stride3
, nlines4
217 lea stride3q
, [strideq
*3]
222 mova
[dstq
+strideq
*2 ], m0
223 mova
[dstq
+strideq
*2 +16], m1
224 mova
[dstq
+strideq
*4 ], m0
225 mova
[dstq
+strideq
*4 +16], m1
226 mova
[dstq
+stride3q
*2 ], m0
227 mova
[dstq
+stride3q
*2+16], m1
228 lea dstq
, [dstq
+strideq
*8]
234 cglobal highbd_v_predictor_32x32
, 3, 4, 4, dst
, stride
, above
239 DEFINE_ARGS dst
, stride
, stride3
, nlines4
240 lea stride3q
, [strideq
*3]
247 mova
[dstq
+strideq
*2 ], m0
248 mova
[dstq
+strideq
*2 +16], m1
249 mova
[dstq
+strideq
*2 +32], m2
250 mova
[dstq
+strideq
*2 +48], m3
251 mova
[dstq
+strideq
*4 ], m0
252 mova
[dstq
+strideq
*4 +16], m1
253 mova
[dstq
+strideq
*4 +32], m2
254 mova
[dstq
+strideq
*4 +48], m3
255 mova
[dstq
+stride3q
*2 ], m0
256 mova
[dstq
+stride3q
*2 +16], m1
257 mova
[dstq
+stride3q
*2 +32], m2
258 mova
[dstq
+stride3q
*2 +48], m3
259 lea dstq
, [dstq
+strideq
*8]
265 cglobal highbd_tm_predictor_4x4
, 5, 6, 5, dst
, stride
, above
, left
, bps
, one
269 ; Get the values to compute the maximum value at this bit depth
274 DEFINE_ARGS dst
, stride
, line
, left
279 psubw m3
, m2
; max possible value
280 pxor m4
, m4
; min possible value
283 movq m1
, [leftq
+lineq
*4]
284 movq m2
, [leftq
+lineq
*4+2]
289 ;Clamp to the bit-depth
296 movq
[dstq
+strideq
*2], m2
297 lea dstq
, [dstq
+strideq
*4]
303 cglobal highbd_tm_predictor_8x8
, 5, 6, 5, dst
, stride
, above
, left
, bps
, one
307 ; Get the values to compute the maximum value at this bit depth
314 DEFINE_ARGS dst
, stride
, line
, left
321 psubw m3
, m2
; max possible value
322 pxor m4
, m4
; min possible value
325 movd m1
, [leftq
+lineq
*4]
326 movd m2
, [leftq
+lineq
*4+2]
333 ;Clamp to the bit-depth
340 mova
[dstq
+strideq
*2], m2
341 lea dstq
, [dstq
+strideq
*4]
348 cglobal highbd_tm_predictor_16x16
, 5, 6, 9, dst
, stride
, above
, left
, bps
, one
353 ; Get the values to compute the maximum value at this bit depth
360 DEFINE_ARGS dst
, stride
, line
, left
367 psubw m7
, m5
; max possible value
368 pxor m8
, m8
; min possible value
372 movd m2
, [leftq
+lineq
*4]
373 movd m3
, [leftq
+lineq
*4+2]
382 ;Clamp to the bit-depth
393 mova
[dstq
+strideq
*2 ], m5
395 mova
[dstq
+strideq
*2+16], m3
396 lea dstq
, [dstq
+strideq
*4]
402 cglobal highbd_tm_predictor_32x32
, 5, 6, 12, dst
, stride
, above
, left
, bps
, one
409 ; Get the values to compute the maximum value at this bit depth
415 pshuflw m10
, m10
, 0x0
416 DEFINE_ARGS dst
, stride
, line
, left
423 psubw m10
, m5
; max possible value
424 pxor m11
, m11
; min possible value
430 movd m5
, [leftq
+lineq
*4]
431 movd m6
, [leftq
+lineq
*4+2]
440 ;Clamp these values to the bit-depth
458 ;Clamp these values to the bit-depth
468 mova
[dstq
+strideq
*2 ], m7
469 mova
[dstq
+strideq
*2+16], m8
470 mova
[dstq
+strideq
*2+32], m9
471 mova
[dstq
+strideq
*2+48], m6
472 lea dstq
, [dstq
+strideq
*4]