2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
11 %include "third_party/x86inc/x86inc.asm"
21 cglobal highbd_dc_predictor_4x4
, 4, 5, 4, dst
, stride
, above
, left
, goffset
31 paddw m0
, [GLOBAL(pw_4
)]
35 movq
[dstq
+strideq
*2], m0
36 lea dstq
, [dstq
+strideq
*4]
38 movq
[dstq
+strideq
*2], m0
44 cglobal highbd_dc_predictor_8x8
, 4, 5, 4, dst
, stride
, above
, left
, goffset
50 DEFINE_ARGS dst
, stride
, stride3
, one
52 lea stride3q
, [strideq
*3]
61 paddw m0
, [GLOBAL(pw_8
)]
66 mova
[dstq
+strideq
*2 ], m0
67 mova
[dstq
+strideq
*4 ], m0
68 mova
[dstq
+stride3q
*2], m0
69 lea dstq
, [dstq
+strideq
*8]
71 mova
[dstq
+strideq
*2 ], m0
72 mova
[dstq
+strideq
*4 ], m0
73 mova
[dstq
+stride3q
*2], m0
79 cglobal highbd_dc_predictor_16x16
, 4, 5, 5, dst
, stride
, above
, left
, goffset
87 DEFINE_ARGS dst
, stride
, stride3
, lines4
88 lea stride3q
, [strideq
*3]
101 paddd m0
, [GLOBAL(pw_16
)]
108 mova
[dstq
+strideq
*2 ], m0
109 mova
[dstq
+strideq
*2 +16], m0
110 mova
[dstq
+strideq
*4 ], m0
111 mova
[dstq
+strideq
*4 +16], m0
112 mova
[dstq
+stride3q
*2 ], m0
113 mova
[dstq
+stride3q
*2+16], m0
114 lea dstq
, [dstq
+strideq
*8]
123 cglobal highbd_dc_predictor_32x32
, 4, 5, 9, dst
, stride
, above
, left
, goffset
135 DEFINE_ARGS dst
, stride
, stride3
, lines4
136 lea stride3q
, [strideq
*3]
153 paddd m0
, [GLOBAL(pw_32
)]
162 mova
[dstq
+strideq
*2 ], m0
163 mova
[dstq
+strideq
*2+16 ], m0
164 mova
[dstq
+strideq
*2+32 ], m0
165 mova
[dstq
+strideq
*2+48 ], m0
166 mova
[dstq
+strideq
*4 ], m0
167 mova
[dstq
+strideq
*4+16 ], m0
168 mova
[dstq
+strideq
*4+32 ], m0
169 mova
[dstq
+strideq
*4+48 ], m0
170 mova
[dstq
+stride3q
*2 ], m0
171 mova
[dstq
+stride3q
*2 +16], m0
172 mova
[dstq
+stride3q
*2 +32], m0
173 mova
[dstq
+stride3q
*2 +48], m0
174 lea dstq
, [dstq
+strideq
*8]
183 cglobal highbd_v_predictor_4x4
, 3, 3, 1, dst
, stride
, above
186 movq
[dstq
+strideq
*2], m0
187 lea dstq
, [dstq
+strideq
*4]
189 movq
[dstq
+strideq
*2], m0
193 cglobal highbd_v_predictor_8x8
, 3, 3, 1, dst
, stride
, above
195 DEFINE_ARGS dst
, stride
, stride3
196 lea stride3q
, [strideq
*3]
198 mova
[dstq
+strideq
*2 ], m0
199 mova
[dstq
+strideq
*4 ], m0
200 mova
[dstq
+stride3q
*2], m0
201 lea dstq
, [dstq
+strideq
*8]
203 mova
[dstq
+strideq
*2 ], m0
204 mova
[dstq
+strideq
*4 ], m0
205 mova
[dstq
+stride3q
*2], m0
209 cglobal highbd_v_predictor_16x16
, 3, 4, 2, dst
, stride
, above
212 DEFINE_ARGS dst
, stride
, stride3
, nlines4
213 lea stride3q
, [strideq
*3]
218 mova
[dstq
+strideq
*2 ], m0
219 mova
[dstq
+strideq
*2 +16], m1
220 mova
[dstq
+strideq
*4 ], m0
221 mova
[dstq
+strideq
*4 +16], m1
222 mova
[dstq
+stride3q
*2 ], m0
223 mova
[dstq
+stride3q
*2+16], m1
224 lea dstq
, [dstq
+strideq
*8]
230 cglobal highbd_v_predictor_32x32
, 3, 4, 4, dst
, stride
, above
235 DEFINE_ARGS dst
, stride
, stride3
, nlines4
236 lea stride3q
, [strideq
*3]
243 mova
[dstq
+strideq
*2 ], m0
244 mova
[dstq
+strideq
*2 +16], m1
245 mova
[dstq
+strideq
*2 +32], m2
246 mova
[dstq
+strideq
*2 +48], m3
247 mova
[dstq
+strideq
*4 ], m0
248 mova
[dstq
+strideq
*4 +16], m1
249 mova
[dstq
+strideq
*4 +32], m2
250 mova
[dstq
+strideq
*4 +48], m3
251 mova
[dstq
+stride3q
*2 ], m0
252 mova
[dstq
+stride3q
*2 +16], m1
253 mova
[dstq
+stride3q
*2 +32], m2
254 mova
[dstq
+stride3q
*2 +48], m3
255 lea dstq
, [dstq
+strideq
*8]
261 cglobal highbd_tm_predictor_4x4
, 5, 5, 6, dst
, stride
, above
, left
, bps
265 movlhps m0
, m0
; t1 t2 t3 t4 t1 t2 t3 t4
266 movlhps m1
, m1
; tl tl tl tl tl tl tl tl
267 ; Get the values to compute the maximum value at this bit depth
270 psubw m0
, m1
; t1-tl t2-tl t3-tl t4-tl
273 pxor m4
, m4
; min possible value
274 pxor m3
, m2
; max possible value
278 movlhps m2
, m5
; l1 l1 l1 l1 l2 l2 l2 l2
280 ;Clamp to the bit-depth
285 movhpd
[dstq
+strideq
*2], m2
286 lea dstq
, [dstq
+strideq
*4]
291 ;Clamp to the bit-depth
296 movhpd
[dstq
+strideq
*2], m2
300 cglobal highbd_tm_predictor_8x8
, 5, 6, 5, dst
, stride
, above
, left
, bps
, one
304 ; Get the values to compute the maximum value at this bit depth
311 DEFINE_ARGS dst
, stride
, line
, left
318 psubw m3
, m2
; max possible value
319 pxor m4
, m4
; min possible value
322 movd m1
, [leftq
+lineq
*4]
323 movd m2
, [leftq
+lineq
*4+2]
330 ;Clamp to the bit-depth
337 mova
[dstq
+strideq
*2], m2
338 lea dstq
, [dstq
+strideq
*4]
345 cglobal highbd_tm_predictor_16x16
, 5, 6, 9, dst
, stride
, above
, left
, bps
, one
350 ; Get the values to compute the maximum value at this bit depth
357 DEFINE_ARGS dst
, stride
, line
, left
364 psubw m7
, m5
; max possible value
365 pxor m8
, m8
; min possible value
369 movd m2
, [leftq
+lineq
*4]
370 movd m3
, [leftq
+lineq
*4+2]
379 ;Clamp to the bit-depth
390 mova
[dstq
+strideq
*2 ], m5
392 mova
[dstq
+strideq
*2+16], m3
393 lea dstq
, [dstq
+strideq
*4]
399 cglobal highbd_tm_predictor_32x32
, 5, 6, 12, dst
, stride
, above
, left
, bps
, one
406 ; Get the values to compute the maximum value at this bit depth
412 pshuflw m10
, m10
, 0x0
413 DEFINE_ARGS dst
, stride
, line
, left
420 psubw m10
, m5
; max possible value
421 pxor m11
, m11
; min possible value
427 movd m5
, [leftq
+lineq
*4]
428 movd m6
, [leftq
+lineq
*4+2]
437 ;Clamp these values to the bit-depth
455 ;Clamp these values to the bit-depth
465 mova
[dstq
+strideq
*2 ], m7
466 mova
[dstq
+strideq
*2+16], m8
467 mova
[dstq
+strideq
*2+32], m9
468 mova
[dstq
+strideq
*2+48], m6
469 lea dstq
, [dstq
+strideq
*4]