add_savefile: remove callback parameter
[vlc/asuraparaju-public.git] / modules / video_chroma / i420_yuy2.c
blob09a904f951593daaf124516cc71212867c77e361
1 /*****************************************************************************
2 * i420_yuy2.c : YUV to YUV conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000, 2001 the VideoLAN team
5 * $Id$
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damien@videolan.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
26 * Preamble
27 *****************************************************************************/
29 #ifdef HAVE_CONFIG_H
30 # include "config.h"
31 #endif
33 #include <vlc_common.h>
34 #include <vlc_plugin.h>
35 #include <vlc_filter.h>
37 #if defined (MODULE_NAME_IS_i420_yuy2_altivec) && defined(HAVE_ALTIVEC_H)
38 # include <altivec.h>
39 #endif
41 #include "i420_yuy2.h"
43 #define SRC_FOURCC "I420,IYUV,YV12"
45 #if defined (MODULE_NAME_IS_i420_yuy2)
46 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
47 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
48 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
49 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
50 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
51 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
52 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422"
53 #endif
55 /*****************************************************************************
56 * Local and extern prototypes.
57 *****************************************************************************/
58 static int Activate ( vlc_object_t * );
60 static void I420_YUY2 ( filter_t *, picture_t *, picture_t * );
61 static void I420_YVYU ( filter_t *, picture_t *, picture_t * );
62 static void I420_UYVY ( filter_t *, picture_t *, picture_t * );
63 static picture_t *I420_YUY2_Filter ( filter_t *, picture_t * );
64 static picture_t *I420_YVYU_Filter ( filter_t *, picture_t * );
65 static picture_t *I420_UYVY_Filter ( filter_t *, picture_t * );
66 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
67 static void I420_IUYV ( filter_t *, picture_t *, picture_t * );
68 static void I420_cyuv ( filter_t *, picture_t *, picture_t * );
69 static picture_t *I420_IUYV_Filter ( filter_t *, picture_t * );
70 static picture_t *I420_cyuv_Filter ( filter_t *, picture_t * );
71 #endif
72 #if defined (MODULE_NAME_IS_i420_yuy2)
73 static void I420_Y211 ( filter_t *, picture_t *, picture_t * );
74 static picture_t *I420_Y211_Filter ( filter_t *, picture_t * );
75 #endif
77 #ifdef MODULE_NAME_IS_i420_yuy2_mmx
78 /* Initialize MMX-specific constants */
79 static const uint64_t i_00ffw = 0x00ff00ff00ff00ffULL;
80 static const uint64_t i_80w = 0x0000000080808080ULL;
81 #endif
83 /*****************************************************************************
84 * Module descriptor.
85 *****************************************************************************/
86 vlc_module_begin ()
87 #if defined (MODULE_NAME_IS_i420_yuy2)
88 set_description( N_("Conversions from " SRC_FOURCC " to " DEST_FOURCC) )
89 set_capability( "video filter2", 80 )
90 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
91 set_description( N_("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) )
92 set_capability( "video filter2", 160 )
93 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
94 set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
95 set_capability( "video filter2", 250 )
96 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
97 set_description(
98 _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
99 set_capability( "video filter2", 250 )
100 #endif
101 set_callbacks( Activate, NULL )
102 vlc_module_end ()
104 /*****************************************************************************
105 * Activate: allocate a chroma function
106 *****************************************************************************
107 * This function allocates and initializes a chroma function
108 *****************************************************************************/
109 static int Activate( vlc_object_t *p_this )
111 filter_t *p_filter = (filter_t *)p_this;
113 if( p_filter->fmt_in.video.i_width & 1
114 || p_filter->fmt_in.video.i_height & 1 )
116 return -1;
119 if( p_filter->fmt_in.video.i_width != p_filter->fmt_out.video.i_width
120 || p_filter->fmt_in.video.i_height != p_filter->fmt_out.video.i_height )
121 return -1;
123 switch( p_filter->fmt_in.video.i_chroma )
125 case VLC_CODEC_YV12:
126 case VLC_CODEC_I420:
127 switch( p_filter->fmt_out.video.i_chroma )
129 case VLC_CODEC_YUYV:
130 p_filter->pf_video_filter = I420_YUY2_Filter;
131 break;
133 case VLC_CODEC_YVYU:
134 p_filter->pf_video_filter = I420_YVYU_Filter;
135 break;
137 case VLC_CODEC_UYVY:
138 p_filter->pf_video_filter = I420_UYVY_Filter;
139 break;
140 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
141 case VLC_FOURCC('I','U','Y','V'):
142 p_filter->pf_video_filter = I420_IUYV_Filter;
143 break;
145 case VLC_CODEC_CYUV:
146 p_filter->pf_video_filter = I420_cyuv_Filter;
147 break;
148 #endif
150 #if defined (MODULE_NAME_IS_i420_yuy2)
151 case VLC_CODEC_Y211:
152 p_filter->pf_video_filter = I420_Y211_Filter;
153 break;
154 #endif
156 default:
157 return -1;
159 break;
161 default:
162 return -1;
165 return 0;
168 #if 0
169 static inline unsigned long long read_cycles(void)
171 unsigned long long v;
172 __asm__ __volatile__("rdtsc" : "=A" (v): );
174 return v;
176 #endif
178 /* Following functions are local */
180 VIDEO_FILTER_WRAPPER( I420_YUY2 )
181 VIDEO_FILTER_WRAPPER( I420_YVYU )
182 VIDEO_FILTER_WRAPPER( I420_UYVY )
183 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
184 VIDEO_FILTER_WRAPPER( I420_IUYV )
185 VIDEO_FILTER_WRAPPER( I420_cyuv )
186 #endif
187 #if defined (MODULE_NAME_IS_i420_yuy2)
188 VIDEO_FILTER_WRAPPER( I420_Y211 )
189 #endif
191 /*****************************************************************************
192 * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
193 *****************************************************************************/
194 static void I420_YUY2( filter_t *p_filter, picture_t *p_source,
195 picture_t *p_dest )
197 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
198 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
199 uint8_t *p_u = p_source->U_PIXELS;
200 uint8_t *p_v = p_source->V_PIXELS;
202 int i_x, i_y;
204 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
205 #define VEC_NEXT_LINES( ) \
206 p_line1 = p_line2; \
207 p_line2 += p_dest->p->i_pitch; \
208 p_y1 = p_y2; \
209 p_y2 += p_source->p[Y_PLANE].i_pitch;
211 #define VEC_LOAD_UV( ) \
212 u_vec = vec_ld( 0, p_u ); p_u += 16; \
213 v_vec = vec_ld( 0, p_v ); p_v += 16;
215 #define VEC_MERGE( a ) \
216 uv_vec = a( u_vec, v_vec ); \
217 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
218 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
219 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
220 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
221 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
222 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
224 vector unsigned char u_vec;
225 vector unsigned char v_vec;
226 vector unsigned char uv_vec;
227 vector unsigned char y_vec;
229 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
230 ( p_filter->fmt_in.video.i_height % 2 ) ) )
232 /* Width is a multiple of 32, we take 2 lines at a time */
233 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
235 VEC_NEXT_LINES( );
236 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
238 VEC_LOAD_UV( );
239 VEC_MERGE( vec_mergeh );
240 VEC_MERGE( vec_mergel );
244 #warning FIXME: converting widths % 16 but !widths % 32 is broken on altivec
245 #if 0
246 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
247 ( p_filter->fmt_in.video.i_height % 4 ) ) )
249 /* Width is only a multiple of 16, we take 4 lines at a time */
250 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
252 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
253 VEC_NEXT_LINES( );
254 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
256 VEC_LOAD_UV( );
257 VEC_MERGE( vec_mergeh );
258 VEC_MERGE( vec_mergel );
261 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
262 VEC_LOAD_UV( );
263 VEC_MERGE( vec_mergeh );
265 /* Line 3 and 4, pixels 0 to 16 */
266 VEC_NEXT_LINES( );
267 VEC_MERGE( vec_mergel );
269 /* Line 3 and 4, pixels 16 to ( width ) */
270 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
272 VEC_LOAD_UV( );
273 VEC_MERGE( vec_mergeh );
274 VEC_MERGE( vec_mergel );
278 #endif
279 else
281 /* Crap, use the C version */
282 #undef VEC_NEXT_LINES
283 #undef VEC_LOAD_UV
284 #undef VEC_MERGE
285 #endif
287 const int i_source_margin = p_source->p[0].i_pitch
288 - p_source->p[0].i_visible_pitch;
289 const int i_source_margin_c = p_source->p[1].i_pitch
290 - p_source->p[1].i_visible_pitch;
291 const int i_dest_margin = p_dest->p->i_pitch
292 - p_dest->p->i_visible_pitch;
294 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
295 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
297 p_line1 = p_line2;
298 p_line2 += p_dest->p->i_pitch;
300 p_y1 = p_y2;
301 p_y2 += p_source->p[Y_PLANE].i_pitch;
303 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
304 for( i_x = p_filter->fmt_in.video.i_width / 8; i_x-- ; )
306 C_YUV420_YUYV( );
307 C_YUV420_YUYV( );
308 C_YUV420_YUYV( );
309 C_YUV420_YUYV( );
311 #else
312 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
314 MMX_CALL( MMX_YUV420_YUYV );
316 #endif
317 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
319 C_YUV420_YUYV( );
322 p_y1 += i_source_margin;
323 p_y2 += i_source_margin;
324 p_u += i_source_margin_c;
325 p_v += i_source_margin_c;
326 p_line1 += i_dest_margin;
327 p_line2 += i_dest_margin;
330 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
331 /* re-enable FPU registers */
332 MMX_END;
333 #endif
335 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
337 #endif
339 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
341 ** SSE2 128 bits fetch/store instructions are faster
342 ** if memory access is 16 bytes aligned
345 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
346 ((intptr_t)p_line2|(intptr_t)p_y2))) )
348 /* use faster SSE2 aligned fetch and store */
349 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
351 p_line1 = p_line2;
352 p_line2 += p_dest->p->i_pitch;
354 p_y1 = p_y2;
355 p_y2 += p_source->p[Y_PLANE].i_pitch;
357 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
359 SSE2_CALL( SSE2_YUV420_YUYV_ALIGNED );
361 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
363 C_YUV420_YUYV( );
366 p_y1 += i_source_margin;
367 p_y2 += i_source_margin;
368 p_u += i_source_margin_c;
369 p_v += i_source_margin_c;
370 p_line1 += i_dest_margin;
371 p_line2 += i_dest_margin;
374 else
376 /* use slower SSE2 unaligned fetch and store */
377 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
379 p_line1 = p_line2;
380 p_line2 += p_dest->p->i_pitch;
382 p_y1 = p_y2;
383 p_y2 += p_source->p[Y_PLANE].i_pitch;
385 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
387 SSE2_CALL( SSE2_YUV420_YUYV_UNALIGNED );
389 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
391 C_YUV420_YUYV( );
394 p_y1 += i_source_margin;
395 p_y2 += i_source_margin;
396 p_u += i_source_margin_c;
397 p_v += i_source_margin_c;
398 p_line1 += i_dest_margin;
399 p_line2 += i_dest_margin;
402 /* make sure all SSE2 stores are visible thereafter */
403 SSE2_END;
405 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
408 /*****************************************************************************
409 * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
410 *****************************************************************************/
411 static void I420_YVYU( filter_t *p_filter, picture_t *p_source,
412 picture_t *p_dest )
414 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
415 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
416 uint8_t *p_u = p_source->U_PIXELS;
417 uint8_t *p_v = p_source->V_PIXELS;
419 int i_x, i_y;
421 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
422 #define VEC_NEXT_LINES( ) \
423 p_line1 = p_line2; \
424 p_line2 += p_dest->p->i_pitch; \
425 p_y1 = p_y2; \
426 p_y2 += p_source->p[Y_PLANE].i_pitch;
428 #define VEC_LOAD_UV( ) \
429 u_vec = vec_ld( 0, p_u ); p_u += 16; \
430 v_vec = vec_ld( 0, p_v ); p_v += 16;
432 #define VEC_MERGE( a ) \
433 vu_vec = a( v_vec, u_vec ); \
434 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
435 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
436 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
437 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
438 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16; \
439 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16;
441 vector unsigned char u_vec;
442 vector unsigned char v_vec;
443 vector unsigned char vu_vec;
444 vector unsigned char y_vec;
446 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
447 ( p_filter->fmt_in.video.i_height % 2 ) ) )
449 /* Width is a multiple of 32, we take 2 lines at a time */
450 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
452 VEC_NEXT_LINES( );
453 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
455 VEC_LOAD_UV( );
456 VEC_MERGE( vec_mergeh );
457 VEC_MERGE( vec_mergel );
461 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
462 ( p_filter->fmt_in.video.i_height % 4 ) ) )
464 /* Width is only a multiple of 16, we take 4 lines at a time */
465 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
467 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
468 VEC_NEXT_LINES( );
469 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
471 VEC_LOAD_UV( );
472 VEC_MERGE( vec_mergeh );
473 VEC_MERGE( vec_mergel );
476 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
477 VEC_LOAD_UV( );
478 VEC_MERGE( vec_mergeh );
480 /* Line 3 and 4, pixels 0 to 16 */
481 VEC_NEXT_LINES( );
482 VEC_MERGE( vec_mergel );
484 /* Line 3 and 4, pixels 16 to ( width ) */
485 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
487 VEC_LOAD_UV( );
488 VEC_MERGE( vec_mergeh );
489 VEC_MERGE( vec_mergel );
493 else
495 /* Crap, use the C version */
496 #undef VEC_NEXT_LINES
497 #undef VEC_LOAD_UV
498 #undef VEC_MERGE
499 #endif
501 const int i_source_margin = p_source->p[0].i_pitch
502 - p_source->p[0].i_visible_pitch;
503 const int i_source_margin_c = p_source->p[1].i_pitch
504 - p_source->p[1].i_visible_pitch;
505 const int i_dest_margin = p_dest->p->i_pitch
506 - p_dest->p->i_visible_pitch;
508 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
509 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
511 p_line1 = p_line2;
512 p_line2 += p_dest->p->i_pitch;
514 p_y1 = p_y2;
515 p_y2 += p_source->p[Y_PLANE].i_pitch;
517 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
519 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
520 C_YUV420_YVYU( );
521 C_YUV420_YVYU( );
522 C_YUV420_YVYU( );
523 C_YUV420_YVYU( );
524 #else
525 MMX_CALL( MMX_YUV420_YVYU );
526 #endif
528 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
530 C_YUV420_YVYU( );
533 p_y1 += i_source_margin;
534 p_y2 += i_source_margin;
535 p_u += i_source_margin_c;
536 p_v += i_source_margin_c;
537 p_line1 += i_dest_margin;
538 p_line2 += i_dest_margin;
541 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
542 /* re-enable FPU registers */
543 MMX_END;
544 #endif
546 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
548 #endif
550 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
552 ** SSE2 128 bits fetch/store instructions are faster
553 ** if memory access is 16 bytes aligned
555 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
556 ((intptr_t)p_line2|(intptr_t)p_y2))) )
558 /* use faster SSE2 aligned fetch and store */
559 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
561 p_line1 = p_line2;
562 p_line2 += p_dest->p->i_pitch;
564 p_y1 = p_y2;
565 p_y2 += p_source->p[Y_PLANE].i_pitch;
567 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
569 SSE2_CALL( SSE2_YUV420_YVYU_ALIGNED );
571 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
573 C_YUV420_YVYU( );
576 p_y1 += i_source_margin;
577 p_y2 += i_source_margin;
578 p_u += i_source_margin_c;
579 p_v += i_source_margin_c;
580 p_line1 += i_dest_margin;
581 p_line2 += i_dest_margin;
584 else
586 /* use slower SSE2 unaligned fetch and store */
587 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
589 p_line1 = p_line2;
590 p_line2 += p_dest->p->i_pitch;
592 p_y1 = p_y2;
593 p_y2 += p_source->p[Y_PLANE].i_pitch;
595 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
597 SSE2_CALL( SSE2_YUV420_YVYU_UNALIGNED );
599 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
601 C_YUV420_YVYU( );
604 p_y1 += i_source_margin;
605 p_y2 += i_source_margin;
606 p_u += i_source_margin_c;
607 p_v += i_source_margin_c;
608 p_line1 += i_dest_margin;
609 p_line2 += i_dest_margin;
612 /* make sure all SSE2 stores are visible thereafter */
613 SSE2_END;
614 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
617 /*****************************************************************************
618 * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
619 *****************************************************************************/
620 static void I420_UYVY( filter_t *p_filter, picture_t *p_source,
621 picture_t *p_dest )
623 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
624 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
625 uint8_t *p_u = p_source->U_PIXELS;
626 uint8_t *p_v = p_source->V_PIXELS;
628 int i_x, i_y;
630 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
631 #define VEC_NEXT_LINES( ) \
632 p_line1 = p_line2; \
633 p_line2 += p_dest->p->i_pitch; \
634 p_y1 = p_y2; \
635 p_y2 += p_source->p[Y_PLANE].i_pitch;
637 #define VEC_LOAD_UV( ) \
638 u_vec = vec_ld( 0, p_u ); p_u += 16; \
639 v_vec = vec_ld( 0, p_v ); p_v += 16;
641 #define VEC_MERGE( a ) \
642 uv_vec = a( u_vec, v_vec ); \
643 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
644 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
645 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
646 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
647 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16; \
648 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16;
650 vector unsigned char u_vec;
651 vector unsigned char v_vec;
652 vector unsigned char uv_vec;
653 vector unsigned char y_vec;
655 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
656 ( p_filter->fmt_in.video.i_height % 2 ) ) )
658 /* Width is a multiple of 32, we take 2 lines at a time */
659 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
661 VEC_NEXT_LINES( );
662 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
664 VEC_LOAD_UV( );
665 VEC_MERGE( vec_mergeh );
666 VEC_MERGE( vec_mergel );
670 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
671 ( p_filter->fmt_in.video.i_height % 4 ) ) )
673 /* Width is only a multiple of 16, we take 4 lines at a time */
674 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
676 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
677 VEC_NEXT_LINES( );
678 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
680 VEC_LOAD_UV( );
681 VEC_MERGE( vec_mergeh );
682 VEC_MERGE( vec_mergel );
685 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
686 VEC_LOAD_UV( );
687 VEC_MERGE( vec_mergeh );
689 /* Line 3 and 4, pixels 0 to 16 */
690 VEC_NEXT_LINES( );
691 VEC_MERGE( vec_mergel );
693 /* Line 3 and 4, pixels 16 to ( width ) */
694 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
696 VEC_LOAD_UV( );
697 VEC_MERGE( vec_mergeh );
698 VEC_MERGE( vec_mergel );
702 else
704 /* Crap, use the C version */
705 #undef VEC_NEXT_LINES
706 #undef VEC_LOAD_UV
707 #undef VEC_MERGE
708 #endif
710 const int i_source_margin = p_source->p[0].i_pitch
711 - p_source->p[0].i_visible_pitch;
712 const int i_source_margin_c = p_source->p[1].i_pitch
713 - p_source->p[1].i_visible_pitch;
714 const int i_dest_margin = p_dest->p->i_pitch
715 - p_dest->p->i_visible_pitch;
717 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
718 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
720 p_line1 = p_line2;
721 p_line2 += p_dest->p->i_pitch;
723 p_y1 = p_y2;
724 p_y2 += p_source->p[Y_PLANE].i_pitch;
726 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
728 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
729 C_YUV420_UYVY( );
730 C_YUV420_UYVY( );
731 C_YUV420_UYVY( );
732 C_YUV420_UYVY( );
733 #else
734 MMX_CALL( MMX_YUV420_UYVY );
735 #endif
737 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x--; )
739 C_YUV420_UYVY( );
742 p_y1 += i_source_margin;
743 p_y2 += i_source_margin;
744 p_u += i_source_margin_c;
745 p_v += i_source_margin_c;
746 p_line1 += i_dest_margin;
747 p_line2 += i_dest_margin;
750 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
751 /* re-enable FPU registers */
752 MMX_END;
753 #endif
755 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
757 #endif
759 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
761 ** SSE2 128 bits fetch/store instructions are faster
762 ** if memory access is 16 bytes aligned
764 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
765 ((intptr_t)p_line2|(intptr_t)p_y2))) )
767 /* use faster SSE2 aligned fetch and store */
768 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
770 p_line1 = p_line2;
771 p_line2 += p_dest->p->i_pitch;
773 p_y1 = p_y2;
774 p_y2 += p_source->p[Y_PLANE].i_pitch;
776 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
778 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
780 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
782 C_YUV420_UYVY( );
785 p_y1 += i_source_margin;
786 p_y2 += i_source_margin;
787 p_u += i_source_margin_c;
788 p_v += i_source_margin_c;
789 p_line1 += i_dest_margin;
790 p_line2 += i_dest_margin;
793 else
795 /* use slower SSE2 unaligned fetch and store */
796 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
798 p_line1 = p_line2;
799 p_line2 += p_dest->p->i_pitch;
801 p_y1 = p_y2;
802 p_y2 += p_source->p[Y_PLANE].i_pitch;
804 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
806 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
808 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
810 C_YUV420_UYVY( );
813 p_y1 += i_source_margin;
814 p_y2 += i_source_margin;
815 p_u += i_source_margin_c;
816 p_v += i_source_margin_c;
817 p_line1 += i_dest_margin;
818 p_line2 += i_dest_margin;
821 /* make sure all SSE2 stores are visible thereafter */
822 SSE2_END;
823 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
826 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
827 /*****************************************************************************
828 * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
829 *****************************************************************************/
830 static void I420_IUYV( filter_t *p_filter, picture_t *p_source,
831 picture_t *p_dest )
833 VLC_UNUSED(p_source); VLC_UNUSED(p_dest);
834 /* FIXME: TODO ! */
835 msg_Err( p_filter, "I420_IUYV unimplemented, please harass <sam@zoy.org>" );
838 /*****************************************************************************
839 * I420_cyuv: planar YUV 4:2:0 to upside-down packed UYVY 4:2:2
840 *****************************************************************************/
841 static void I420_cyuv( filter_t *p_filter, picture_t *p_source,
842 picture_t *p_dest )
844 uint8_t *p_line1 = p_dest->p->p_pixels +
845 p_dest->p->i_visible_lines * p_dest->p->i_pitch
846 + p_dest->p->i_pitch;
847 uint8_t *p_line2 = p_dest->p->p_pixels +
848 p_dest->p->i_visible_lines * p_dest->p->i_pitch;
849 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
850 uint8_t *p_u = p_source->U_PIXELS;
851 uint8_t *p_v = p_source->V_PIXELS;
853 int i_x, i_y;
855 const int i_source_margin = p_source->p[0].i_pitch
856 - p_source->p[0].i_visible_pitch;
857 const int i_source_margin_c = p_source->p[1].i_pitch
858 - p_source->p[1].i_visible_pitch;
859 const int i_dest_margin = p_dest->p->i_pitch
860 - p_dest->p->i_visible_pitch;
862 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
863 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
865 p_line1 -= 3 * p_dest->p->i_pitch;
866 p_line2 -= 3 * p_dest->p->i_pitch;
868 p_y1 = p_y2;
869 p_y2 += p_source->p[Y_PLANE].i_pitch;
871 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
873 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
874 C_YUV420_UYVY( );
875 C_YUV420_UYVY( );
876 C_YUV420_UYVY( );
877 C_YUV420_UYVY( );
878 #else
879 MMX_CALL( MMX_YUV420_UYVY );
880 #endif
882 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
884 C_YUV420_UYVY( );
887 p_y1 += i_source_margin;
888 p_y2 += i_source_margin;
889 p_u += i_source_margin_c;
890 p_v += i_source_margin_c;
891 p_line1 += i_dest_margin;
892 p_line2 += i_dest_margin;
895 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
896 /* re-enable FPU registers */
897 MMX_END;
898 #endif
900 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
902 ** SSE2 128 bits fetch/store instructions are faster
903 ** if memory access is 16 bytes aligned
905 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
906 ((intptr_t)p_line2|(intptr_t)p_y2))) )
908 /* use faster SSE2 aligned fetch and store */
909 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
911 p_line1 = p_line2;
912 p_line2 += p_dest->p->i_pitch;
914 p_y1 = p_y2;
915 p_y2 += p_source->p[Y_PLANE].i_pitch;
917 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
919 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
921 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
923 C_YUV420_UYVY( );
926 p_y1 += i_source_margin;
927 p_y2 += i_source_margin;
928 p_u += i_source_margin_c;
929 p_v += i_source_margin_c;
930 p_line1 += i_dest_margin;
931 p_line2 += i_dest_margin;
934 else
936 /* use slower SSE2 unaligned fetch and store */
937 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
939 p_line1 = p_line2;
940 p_line2 += p_dest->p->i_pitch;
942 p_y1 = p_y2;
943 p_y2 += p_source->p[Y_PLANE].i_pitch;
945 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
947 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
949 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
951 C_YUV420_UYVY( );
954 p_y1 += i_source_margin;
955 p_y2 += i_source_margin;
956 p_u += i_source_margin_c;
957 p_v += i_source_margin_c;
958 p_line1 += i_dest_margin;
959 p_line2 += i_dest_margin;
962 /* make sure all SSE2 stores are visible thereafter */
963 SSE2_END;
964 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
966 #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
968 /*****************************************************************************
969 * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
970 *****************************************************************************/
971 #if defined (MODULE_NAME_IS_i420_yuy2)
972 static void I420_Y211( filter_t *p_filter, picture_t *p_source,
973 picture_t *p_dest )
975 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
976 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
977 uint8_t *p_u = p_source->U_PIXELS;
978 uint8_t *p_v = p_source->V_PIXELS;
980 int i_x, i_y;
982 const int i_source_margin = p_source->p[0].i_pitch
983 - p_source->p[0].i_visible_pitch;
984 const int i_source_margin_c = p_source->p[1].i_pitch
985 - p_source->p[1].i_visible_pitch;
986 const int i_dest_margin = p_dest->p->i_pitch
987 - p_dest->p->i_visible_pitch;
989 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
991 p_line1 = p_line2;
992 p_line2 += p_dest->p->i_pitch;
994 p_y1 = p_y2;
995 p_y2 += p_source->p[Y_PLANE].i_pitch;
997 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
999 C_YUV420_Y211( );
1000 C_YUV420_Y211( );
1003 p_y1 += i_source_margin;
1004 p_y2 += i_source_margin;
1005 p_u += i_source_margin_c;
1006 p_v += i_source_margin_c;
1007 p_line1 += i_dest_margin;
1008 p_line2 += i_dest_margin;
1011 #endif