* SDL compilation fix for FreeBSD.
[vlc.git] / plugins / motion / vdec_motion_inner_mmxext.c
blob2d6df9ffc3f63a1f83049650fc5e7215d1713612
1 /*****************************************************************************
2 * vdec_motion_inner_mmxext.c : motion compensation inner routines optimized
3 * in MMX EXT
4 *****************************************************************************
5 * Copyright (C) 1999, 2000 VideoLAN
6 * $Id: vdec_motion_inner_mmxext.c,v 1.3 2001/06/07 22:14:55 sam Exp $
8 * Authors: Christophe Massiot <massiot@via.ecp.fr>, largerly inspired by the
9 * work done by the livid project <http://www.linuxvideo.org/>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
24 *****************************************************************************/
26 #define MODULE_NAME motionmmxext
27 #include "modules_inner.h"
29 /*****************************************************************************
30 * Preamble
31 *****************************************************************************/
32 #include "defs.h"
34 #include "config.h"
35 #include "common.h"
36 #include "threads.h"
37 #include "mtime.h"
39 #include "video.h"
41 #include "attributes.h"
42 #include "mmx.h"
44 /* OK, I know, this code has been taken from livid's mpeg2dec --Meuuh */
46 static mmx_t mask_one = {0x0101010101010101LL};
49 * Useful functions
52 #define pavg_r2r(src,dest) pavgb_r2r (src, dest);
53 #define pavg_m2r(src,dest) pavgb_m2r (src, dest);
55 #define __MotionComponent_x_y_copy(width,height) \
56 void _M(MotionComponent_x_y_copy_##width##_##height)(yuv_data_t * p_src, \
57 yuv_data_t * p_dest, \
58 int i_stride) \
59 { \
60 int i_y; \
62 pxor_r2r (mm0, mm0); \
63 pxor_r2r (mm1, mm1); \
64 pxor_r2r (mm2, mm2); \
65 pxor_r2r (mm3, mm3); \
66 pxor_r2r (mm4, mm4); \
67 pxor_r2r (mm5, mm5); \
68 pxor_r2r (mm6, mm6); \
69 pxor_r2r (mm7, mm7); \
71 for( i_y = 0; i_y < height; i_y ++ ) \
72 { \
73 movq_m2r( *p_src, mm0 ); /* load 8 ref bytes */ \
74 if( width == 16 ) \
75 movq_m2r( *(p_src + 8), mm1 ); \
76 p_src += i_stride; \
78 movq_r2m( mm0, *p_dest ); /* store 8 bytes at curr */ \
79 if( width == 16 ) \
80 movq_r2m( mm1, *(p_dest + 8) ); \
81 p_dest += i_stride; \
82 } \
85 #define __MotionComponent_X_y_copy(width,height) \
86 void _M(MotionComponent_X_y_copy_##width##_##height)(yuv_data_t * p_src, \
87 yuv_data_t * p_dest, \
88 int i_stride) \
89 { \
90 int i_y; \
92 for( i_y = 0; i_y < height; i_y ++ ) \
93 { \
94 movq_m2r (*p_src, mm0); \
95 if( width == 16 ) \
96 movq_m2r (*(p_src + 8), mm1); \
97 pavg_m2r (*(p_src + 1), mm0); \
98 if( width == 16 ) \
99 pavg_m2r (*(p_src + 9), mm1); \
100 movq_r2m (mm0, *p_dest); \
101 p_src += i_stride; \
102 if( width == 16 ) \
103 movq_r2m (mm1, *(p_dest + 8)); \
104 p_dest += i_stride; \
108 #define __MotionComponent_x_Y_copy(width,height) \
109 void _M(MotionComponent_x_Y_copy_##width##_##height)(yuv_data_t * p_src, \
110 yuv_data_t * p_dest, \
111 int i_stride) \
113 int i_y; \
114 yuv_data_t * p_next_src = p_src + i_stride; \
116 for( i_y = 0; i_y < height; i_y ++ ) \
118 movq_m2r (*p_src, mm0); \
119 if( width == 16 ) \
120 movq_m2r (*(p_src + 8), mm1); \
121 pavg_m2r (*(p_next_src), mm0); \
122 if( width == 16 ) \
123 pavg_m2r (*(p_next_src + 8), mm1); \
124 movq_r2m (mm0, *p_dest); \
125 p_src += i_stride; \
126 p_next_src += i_stride; \
127 if( width == 16 ) \
128 movq_r2m (mm1, *(p_dest + 8)); \
129 p_dest += i_stride; \
133 #define __MotionComponent_X_Y_copy(width,height) \
134 void _M(MotionComponent_X_Y_copy_##width##_##height)(yuv_data_t * p_src, \
135 yuv_data_t * p_dest, \
136 int i_stride) \
138 int i_y; \
140 if( width == 16 ) \
142 for( i_y = 0; i_y < height; i_y ++ ) \
144 movq_m2r (*p_src, mm0); \
145 movq_m2r (*(p_src+i_stride+1), mm1); \
146 movq_r2r (mm0, mm7); \
147 movq_m2r (*(p_src+1), mm2); \
148 pxor_r2r (mm1, mm7); \
149 movq_m2r (*(p_src + i_stride), mm3); \
150 movq_r2r (mm2, mm6); \
151 pxor_r2r (mm3, mm6); \
152 pavg_r2r (mm1, mm0); \
153 pavg_r2r (mm3, mm2); \
154 por_r2r (mm6, mm7); \
155 movq_r2r (mm0, mm6); \
156 pxor_r2r (mm2, mm6); \
157 pand_r2r (mm6, mm7); \
158 pand_m2r (mask_one, mm7); \
159 pavg_r2r (mm2, mm0); \
160 psubusb_r2r (mm7, mm0); \
161 movq_r2m (mm0, *p_dest); \
163 movq_m2r (*(p_src+8), mm0); \
164 movq_m2r (*(p_src+i_stride+9), mm1); \
165 movq_r2r (mm0, mm7); \
166 movq_m2r (*(p_src+9), mm2); \
167 pxor_r2r (mm1, mm7); \
168 movq_m2r (*(p_src+i_stride+8), mm3); \
169 movq_r2r (mm2, mm6); \
170 pxor_r2r (mm3, mm6); \
171 pavg_r2r (mm1, mm0); \
172 pavg_r2r (mm3, mm2); \
173 por_r2r (mm6, mm7); \
174 movq_r2r (mm0, mm6); \
175 pxor_r2r (mm2, mm6); \
176 pand_r2r (mm6, mm7); \
177 pand_m2r (mask_one, mm7); \
178 pavg_r2r (mm2, mm0); \
179 psubusb_r2r (mm7, mm0); \
180 p_src += i_stride; \
181 movq_r2m (mm0, *(p_dest+8)); \
182 p_dest += i_stride; \
185 else \
187 movq_m2r (*p_src, mm0); \
188 movq_m2r (*(p_src+1), mm1); \
189 movq_r2r (mm0, mm7); \
190 pxor_r2r (mm1, mm7); \
191 pavg_r2r (mm1, mm0); \
192 p_src += i_stride; \
194 for( i_y = 0; i_y < height; i_y ++ ) \
196 movq_m2r (*p_src, mm2); \
197 movq_r2r (mm0, mm5); \
198 movq_m2r (*(p_src+1), mm3); \
199 movq_r2r (mm2, mm6); \
200 pxor_r2r (mm3, mm6); \
201 pavg_r2r (mm3, mm2); \
202 por_r2r (mm6, mm7); \
203 pxor_r2r (mm2, mm5); \
204 pand_r2r (mm5, mm7); \
205 pavg_r2r (mm2, mm0); \
206 pand_m2r (mask_one, mm7); \
207 psubusb_r2r (mm7, mm0); \
208 p_src += i_stride; \
209 movq_r2m (mm0, *p_dest); \
210 p_dest += i_stride; \
211 movq_r2r (mm6, mm7); \
212 movq_r2r (mm2, mm0); \
217 #define __MotionComponent_x_y_avg(width,height) \
218 void _M(MotionComponent_x_y_avg_##width##_##height)(yuv_data_t * p_src, \
219 yuv_data_t * p_dest, \
220 int i_stride) \
222 int i_y; \
224 for( i_y = 0; i_y < height; i_y ++ ) \
226 movq_m2r( *p_src, mm0 ); \
227 if( width == 16 ) \
228 movq_m2r( *(p_src + 8), mm1 ); \
229 pavg_m2r( *p_dest, mm0 ); \
230 if( width == 16 ) \
231 pavg_m2r( *(p_dest + 8), mm1 ); \
232 movq_r2m( mm0, *p_dest ); \
233 p_src += i_stride; \
234 if( width == 16 ) \
235 movq_r2m( mm1, *(p_dest + 8) ); \
236 p_dest += i_stride; \
240 #define __MotionComponent_X_y_avg(width,height) \
241 void _M(MotionComponent_X_y_avg_##width##_##height)(yuv_data_t * p_src, \
242 yuv_data_t * p_dest, \
243 int i_stride) \
245 int i_y; \
247 for( i_y = 0; i_y < height; i_y ++ ) \
249 movq_m2r (*p_src, mm0); \
250 if( width == 16 ) \
251 movq_m2r (*(p_src + 8), mm1); \
252 pavg_m2r (*(p_src + 1), mm0); \
253 if( width == 16 ) \
254 pavg_m2r (*(p_src + 9), mm1); \
255 pavg_m2r (*p_dest, mm0); \
256 if( width == 16 ) \
257 pavg_m2r (*(p_dest + 8), mm1); \
258 p_src += i_stride; \
259 movq_r2m (mm0, *p_dest); \
260 if( width == 16 ) \
261 movq_r2m (mm1, *(p_dest + 8)); \
262 p_dest += i_stride; \
266 #define __MotionComponent_x_Y_avg(width,height) \
267 void _M(MotionComponent_x_Y_avg_##width##_##height)(yuv_data_t * p_src, \
268 yuv_data_t * p_dest, \
269 int i_stride) \
271 int i_y; \
272 yuv_data_t * p_next_src = p_src + i_stride; \
274 for( i_y = 0; i_y < height; i_y ++ ) \
276 movq_m2r (*p_src, mm0); \
277 if( width == 16 ) \
278 movq_m2r (*(p_src + 8), mm1); \
279 pavg_m2r (*(p_next_src), mm0); \
280 if( width == 16 ) \
281 pavg_m2r (*(p_next_src + 8), mm1); \
282 pavg_m2r (*p_dest, mm0); \
283 if( width == 16 ) \
284 pavg_m2r (*(p_dest + 8), mm1); \
285 p_src += i_stride; \
286 p_next_src += i_stride; \
287 movq_r2m (mm0, *p_dest); \
288 if( width == 16 ) \
289 movq_r2m (mm1, *(p_dest + 8)); \
290 p_dest += i_stride; \
294 #define __MotionComponent_X_Y_avg(width,height) \
295 void _M(MotionComponent_X_Y_avg_##width##_##height)(yuv_data_t * p_src, \
296 yuv_data_t * p_dest, \
297 int i_stride) \
299 int i_y; \
301 if( width == 16 ) \
303 for( i_y = 0; i_y < height; i_y ++ ) \
305 movq_m2r (*p_src, mm0); \
306 movq_m2r (*(p_src+i_stride+1), mm1); \
307 movq_r2r (mm0, mm7); \
308 movq_m2r (*(p_src+1), mm2); \
309 pxor_r2r (mm1, mm7); \
310 movq_m2r (*(p_src+i_stride), mm3); \
311 movq_r2r (mm2, mm6); \
312 pxor_r2r (mm3, mm6); \
313 pavg_r2r (mm1, mm0); \
314 pavg_r2r (mm3, mm2); \
315 por_r2r (mm6, mm7); \
316 movq_r2r (mm0, mm6); \
317 pxor_r2r (mm2, mm6); \
318 pand_r2r (mm6, mm7); \
319 pand_m2r (mask_one, mm7); \
320 pavg_r2r (mm2, mm0); \
321 psubusb_r2r (mm7, mm0); \
322 movq_m2r (*p_dest, mm1); \
323 pavg_r2r (mm1, mm0); \
324 movq_r2m (mm0, *p_dest); \
326 movq_m2r (*(p_src+8), mm0); \
327 movq_m2r (*(p_src+i_stride+9), mm1); \
328 movq_r2r (mm0, mm7); \
329 movq_m2r (*(p_src+9), mm2); \
330 pxor_r2r (mm1, mm7); \
331 movq_m2r (*(p_src+i_stride+8), mm3); \
332 movq_r2r (mm2, mm6); \
333 pxor_r2r (mm3, mm6); \
334 pavg_r2r (mm1, mm0); \
335 pavg_r2r (mm3, mm2); \
336 por_r2r (mm6, mm7); \
337 movq_r2r (mm0, mm6); \
338 pxor_r2r (mm2, mm6); \
339 pand_r2r (mm6, mm7); \
340 pand_m2r (mask_one, mm7); \
341 pavg_r2r (mm2, mm0); \
342 psubusb_r2r (mm7, mm0); \
343 movq_m2r (*(p_dest+8), mm1); \
344 pavg_r2r (mm1, mm0); \
345 p_src += i_stride; \
346 movq_r2m (mm0, *(p_dest+8)); \
347 p_dest += i_stride; \
350 else \
352 for( i_y = 0; i_y < height; i_y ++ ) \
354 movq_m2r (*p_src, mm0); \
355 movq_m2r (*(p_src+i_stride+1), mm1); \
356 movq_r2r (mm0, mm7); \
357 movq_m2r (*(p_src+1), mm2); \
358 pxor_r2r (mm1, mm7); \
359 movq_m2r (*(p_src+i_stride), mm3); \
360 movq_r2r (mm2, mm6); \
361 pxor_r2r (mm3, mm6); \
362 pavg_r2r (mm1, mm0); \
363 pavg_r2r (mm3, mm2); \
364 por_r2r (mm6, mm7); \
365 movq_r2r (mm0, mm6); \
366 pxor_r2r (mm2, mm6); \
367 pand_r2r (mm6, mm7); \
368 pand_m2r (mask_one, mm7); \
369 pavg_r2r (mm2, mm0); \
370 psubusb_r2r (mm7, mm0); \
371 movq_m2r (*p_dest, mm1); \
372 pavg_r2r (mm1, mm0); \
373 p_src += i_stride; \
374 movq_r2m (mm0, *p_dest); \
375 p_dest += i_stride; \
380 #define __MotionComponents(width,height) \
381 __MotionComponent_x_y_copy(width,height) \
382 __MotionComponent_X_y_copy(width,height) \
383 __MotionComponent_x_Y_copy(width,height) \
384 __MotionComponent_X_Y_copy(width,height) \
385 __MotionComponent_x_y_avg(width,height) \
386 __MotionComponent_X_y_avg(width,height) \
387 __MotionComponent_x_Y_avg(width,height) \
388 __MotionComponent_X_Y_avg(width,height)
390 __MotionComponents (16,16) /* 444, 422, 420 */
391 __MotionComponents (16,8) /* 444, 422, 420 */
392 __MotionComponents (8,8) /* 422, 420 */
393 __MotionComponents (8,4) /* 420 */
394 #if 0
395 __MotionComponents (8,16) /* 422 */
396 #endif