Qt: do not show open options in both normal and advanced UI
[vlc.git] / modules / video_chroma / i420_yuy2.c
blob1d36b06f36a6930524d8f074a1ab2d80e4e6c519
1 /*****************************************************************************
2 * i420_yuy2.c : YUV to YUV conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000, 2001 the VideoLAN team
5 * $Id$
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damien@videolan.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
26 * Preamble
27 *****************************************************************************/
29 #ifdef HAVE_CONFIG_H
30 # include "config.h"
31 #endif
33 #include <vlc_common.h>
34 #include <vlc_plugin.h>
35 #include <vlc_filter.h>
36 #include <vlc_cpu.h>
38 #if defined (MODULE_NAME_IS_i420_yuy2_altivec) && defined(HAVE_ALTIVEC_H)
39 # include <altivec.h>
40 #endif
42 #include "i420_yuy2.h"
44 #define SRC_FOURCC "I420,IYUV,YV12"
46 #if defined (MODULE_NAME_IS_i420_yuy2)
47 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv,Y211"
48 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
49 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
50 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
51 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422,IUYV,cyuv"
52 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
53 # define DEST_FOURCC "YUY2,YUNV,YVYU,UYVY,UYNV,Y422"
54 #endif
56 /*****************************************************************************
57 * Local and extern prototypes.
58 *****************************************************************************/
59 static int Activate ( vlc_object_t * );
61 static void I420_YUY2 ( filter_t *, picture_t *, picture_t * );
62 static void I420_YVYU ( filter_t *, picture_t *, picture_t * );
63 static void I420_UYVY ( filter_t *, picture_t *, picture_t * );
64 static picture_t *I420_YUY2_Filter ( filter_t *, picture_t * );
65 static picture_t *I420_YVYU_Filter ( filter_t *, picture_t * );
66 static picture_t *I420_UYVY_Filter ( filter_t *, picture_t * );
67 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
68 static void I420_IUYV ( filter_t *, picture_t *, picture_t * );
69 static void I420_cyuv ( filter_t *, picture_t *, picture_t * );
70 static picture_t *I420_IUYV_Filter ( filter_t *, picture_t * );
71 static picture_t *I420_cyuv_Filter ( filter_t *, picture_t * );
72 #endif
73 #if defined (MODULE_NAME_IS_i420_yuy2)
74 static void I420_Y211 ( filter_t *, picture_t *, picture_t * );
75 static picture_t *I420_Y211_Filter ( filter_t *, picture_t * );
76 #endif
78 #ifdef MODULE_NAME_IS_i420_yuy2_mmx
79 /* Initialize MMX-specific constants */
80 static const uint64_t i_00ffw = 0x00ff00ff00ff00ffULL;
81 static const uint64_t i_80w = 0x0000000080808080ULL;
82 #endif
84 /*****************************************************************************
85 * Module descriptor.
86 *****************************************************************************/
87 vlc_module_begin ()
88 #if defined (MODULE_NAME_IS_i420_yuy2)
89 set_description( N_("Conversions from " SRC_FOURCC " to " DEST_FOURCC) )
90 set_capability( "video filter2", 80 )
91 # define CPU_CAPABILITY 0
92 #elif defined (MODULE_NAME_IS_i420_yuy2_mmx)
93 set_description( N_("MMX conversions from " SRC_FOURCC " to " DEST_FOURCC) )
94 set_capability( "video filter2", 160 )
95 # define CPU_CAPABILITY CPU_CAPABILITY_MMX
96 #elif defined (MODULE_NAME_IS_i420_yuy2_sse2)
97 set_description( N_("SSE2 conversions from " SRC_FOURCC " to " DEST_FOURCC) )
98 set_capability( "video filter2", 250 )
99 # define CPU_CAPABILITY CPU_CAPABILITY_SSE2
100 #elif defined (MODULE_NAME_IS_i420_yuy2_altivec)
101 set_description(
102 _("AltiVec conversions from " SRC_FOURCC " to " DEST_FOURCC) );
103 set_capability( "video filter2", 250 )
104 # define CPU_CAPABILITY CPU_CAPABILITY_ALTIVEC
105 #endif
106 set_callbacks( Activate, NULL )
107 vlc_module_end ()
109 /*****************************************************************************
110 * Activate: allocate a chroma function
111 *****************************************************************************
112 * This function allocates and initializes a chroma function
113 *****************************************************************************/
114 static int Activate( vlc_object_t *p_this )
116 filter_t *p_filter = (filter_t *)p_this;
118 #if CPU_CAPABILITY
119 if( !(vlc_CPU() & CPU_CAPABILITY) )
120 return VLC_EGENERIC;
121 #endif
122 if( p_filter->fmt_in.video.i_width & 1
123 || p_filter->fmt_in.video.i_height & 1 )
125 return -1;
128 if( p_filter->fmt_in.video.i_width != p_filter->fmt_out.video.i_width
129 || p_filter->fmt_in.video.i_height != p_filter->fmt_out.video.i_height )
130 return -1;
132 switch( p_filter->fmt_in.video.i_chroma )
134 case VLC_CODEC_YV12:
135 case VLC_CODEC_I420:
136 switch( p_filter->fmt_out.video.i_chroma )
138 case VLC_CODEC_YUYV:
139 p_filter->pf_video_filter = I420_YUY2_Filter;
140 break;
142 case VLC_CODEC_YVYU:
143 p_filter->pf_video_filter = I420_YVYU_Filter;
144 break;
146 case VLC_CODEC_UYVY:
147 p_filter->pf_video_filter = I420_UYVY_Filter;
148 break;
149 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
150 case VLC_FOURCC('I','U','Y','V'):
151 p_filter->pf_video_filter = I420_IUYV_Filter;
152 break;
154 case VLC_CODEC_CYUV:
155 p_filter->pf_video_filter = I420_cyuv_Filter;
156 break;
157 #endif
159 #if defined (MODULE_NAME_IS_i420_yuy2)
160 case VLC_CODEC_Y211:
161 p_filter->pf_video_filter = I420_Y211_Filter;
162 break;
163 #endif
165 default:
166 return -1;
168 break;
170 default:
171 return -1;
174 return 0;
177 #if 0
178 static inline unsigned long long read_cycles(void)
180 unsigned long long v;
181 __asm__ __volatile__("rdtsc" : "=A" (v): );
183 return v;
185 #endif
187 /* Following functions are local */
189 VIDEO_FILTER_WRAPPER( I420_YUY2 )
190 VIDEO_FILTER_WRAPPER( I420_YVYU )
191 VIDEO_FILTER_WRAPPER( I420_UYVY )
192 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
193 VIDEO_FILTER_WRAPPER( I420_IUYV )
194 VIDEO_FILTER_WRAPPER( I420_cyuv )
195 #endif
196 #if defined (MODULE_NAME_IS_i420_yuy2)
197 VIDEO_FILTER_WRAPPER( I420_Y211 )
198 #endif
200 /*****************************************************************************
201 * I420_YUY2: planar YUV 4:2:0 to packed YUYV 4:2:2
202 *****************************************************************************/
203 static void I420_YUY2( filter_t *p_filter, picture_t *p_source,
204 picture_t *p_dest )
206 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
207 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
208 uint8_t *p_u = p_source->U_PIXELS;
209 uint8_t *p_v = p_source->V_PIXELS;
211 int i_x, i_y;
213 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
214 #define VEC_NEXT_LINES( ) \
215 p_line1 = p_line2; \
216 p_line2 += p_dest->p->i_pitch; \
217 p_y1 = p_y2; \
218 p_y2 += p_source->p[Y_PLANE].i_pitch;
220 #define VEC_LOAD_UV( ) \
221 u_vec = vec_ld( 0, p_u ); p_u += 16; \
222 v_vec = vec_ld( 0, p_v ); p_v += 16;
224 #define VEC_MERGE( a ) \
225 uv_vec = a( u_vec, v_vec ); \
226 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
227 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
228 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line1 ); p_line1 += 16; \
229 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
230 vec_st( vec_mergeh( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16; \
231 vec_st( vec_mergel( y_vec, uv_vec ), 0, p_line2 ); p_line2 += 16;
233 vector unsigned char u_vec;
234 vector unsigned char v_vec;
235 vector unsigned char uv_vec;
236 vector unsigned char y_vec;
238 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
239 ( p_filter->fmt_in.video.i_height % 2 ) ) )
241 /* Width is a multiple of 32, we take 2 lines at a time */
242 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
244 VEC_NEXT_LINES( );
245 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
247 VEC_LOAD_UV( );
248 VEC_MERGE( vec_mergeh );
249 VEC_MERGE( vec_mergel );
253 #warning FIXME: converting widths % 16 but !widths % 32 is broken on altivec
254 #if 0
255 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
256 ( p_filter->fmt_in.video.i_height % 4 ) ) )
258 /* Width is only a multiple of 16, we take 4 lines at a time */
259 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
261 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
262 VEC_NEXT_LINES( );
263 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
265 VEC_LOAD_UV( );
266 VEC_MERGE( vec_mergeh );
267 VEC_MERGE( vec_mergel );
270 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
271 VEC_LOAD_UV( );
272 VEC_MERGE( vec_mergeh );
274 /* Line 3 and 4, pixels 0 to 16 */
275 VEC_NEXT_LINES( );
276 VEC_MERGE( vec_mergel );
278 /* Line 3 and 4, pixels 16 to ( width ) */
279 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
281 VEC_LOAD_UV( );
282 VEC_MERGE( vec_mergeh );
283 VEC_MERGE( vec_mergel );
287 #endif
288 else
290 /* Crap, use the C version */
291 #undef VEC_NEXT_LINES
292 #undef VEC_LOAD_UV
293 #undef VEC_MERGE
294 #endif
296 const int i_source_margin = p_source->p[0].i_pitch
297 - p_source->p[0].i_visible_pitch;
298 const int i_source_margin_c = p_source->p[1].i_pitch
299 - p_source->p[1].i_visible_pitch;
300 const int i_dest_margin = p_dest->p->i_pitch
301 - p_dest->p->i_visible_pitch;
303 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
304 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
306 p_line1 = p_line2;
307 p_line2 += p_dest->p->i_pitch;
309 p_y1 = p_y2;
310 p_y2 += p_source->p[Y_PLANE].i_pitch;
312 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
313 for( i_x = p_filter->fmt_in.video.i_width / 8; i_x-- ; )
315 C_YUV420_YUYV( );
316 C_YUV420_YUYV( );
317 C_YUV420_YUYV( );
318 C_YUV420_YUYV( );
320 #else
321 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
323 MMX_CALL( MMX_YUV420_YUYV );
325 #endif
326 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
328 C_YUV420_YUYV( );
331 p_y1 += i_source_margin;
332 p_y2 += i_source_margin;
333 p_u += i_source_margin_c;
334 p_v += i_source_margin_c;
335 p_line1 += i_dest_margin;
336 p_line2 += i_dest_margin;
339 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
340 /* re-enable FPU registers */
341 MMX_END;
342 #endif
344 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
346 #endif
348 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
350 ** SSE2 128 bits fetch/store instructions are faster
351 ** if memory access is 16 bytes aligned
354 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
355 ((intptr_t)p_line2|(intptr_t)p_y2))) )
357 /* use faster SSE2 aligned fetch and store */
358 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
360 p_line1 = p_line2;
361 p_line2 += p_dest->p->i_pitch;
363 p_y1 = p_y2;
364 p_y2 += p_source->p[Y_PLANE].i_pitch;
366 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
368 SSE2_CALL( SSE2_YUV420_YUYV_ALIGNED );
370 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
372 C_YUV420_YUYV( );
375 p_y1 += i_source_margin;
376 p_y2 += i_source_margin;
377 p_u += i_source_margin_c;
378 p_v += i_source_margin_c;
379 p_line1 += i_dest_margin;
380 p_line2 += i_dest_margin;
383 else
385 /* use slower SSE2 unaligned fetch and store */
386 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
388 p_line1 = p_line2;
389 p_line2 += p_dest->p->i_pitch;
391 p_y1 = p_y2;
392 p_y2 += p_source->p[Y_PLANE].i_pitch;
394 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
396 SSE2_CALL( SSE2_YUV420_YUYV_UNALIGNED );
398 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
400 C_YUV420_YUYV( );
403 p_y1 += i_source_margin;
404 p_y2 += i_source_margin;
405 p_u += i_source_margin_c;
406 p_v += i_source_margin_c;
407 p_line1 += i_dest_margin;
408 p_line2 += i_dest_margin;
411 /* make sure all SSE2 stores are visible thereafter */
412 SSE2_END;
414 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
417 /*****************************************************************************
418 * I420_YVYU: planar YUV 4:2:0 to packed YVYU 4:2:2
419 *****************************************************************************/
420 static void I420_YVYU( filter_t *p_filter, picture_t *p_source,
421 picture_t *p_dest )
423 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
424 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
425 uint8_t *p_u = p_source->U_PIXELS;
426 uint8_t *p_v = p_source->V_PIXELS;
428 int i_x, i_y;
430 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
431 #define VEC_NEXT_LINES( ) \
432 p_line1 = p_line2; \
433 p_line2 += p_dest->p->i_pitch; \
434 p_y1 = p_y2; \
435 p_y2 += p_source->p[Y_PLANE].i_pitch;
437 #define VEC_LOAD_UV( ) \
438 u_vec = vec_ld( 0, p_u ); p_u += 16; \
439 v_vec = vec_ld( 0, p_v ); p_v += 16;
441 #define VEC_MERGE( a ) \
442 vu_vec = a( v_vec, u_vec ); \
443 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
444 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
445 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line1 ); p_line1 += 16; \
446 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
447 vec_st( vec_mergeh( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16; \
448 vec_st( vec_mergel( y_vec, vu_vec ), 0, p_line2 ); p_line2 += 16;
450 vector unsigned char u_vec;
451 vector unsigned char v_vec;
452 vector unsigned char vu_vec;
453 vector unsigned char y_vec;
455 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
456 ( p_filter->fmt_in.video.i_height % 2 ) ) )
458 /* Width is a multiple of 32, we take 2 lines at a time */
459 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
461 VEC_NEXT_LINES( );
462 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
464 VEC_LOAD_UV( );
465 VEC_MERGE( vec_mergeh );
466 VEC_MERGE( vec_mergel );
470 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
471 ( p_filter->fmt_in.video.i_height % 4 ) ) )
473 /* Width is only a multiple of 16, we take 4 lines at a time */
474 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
476 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
477 VEC_NEXT_LINES( );
478 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
480 VEC_LOAD_UV( );
481 VEC_MERGE( vec_mergeh );
482 VEC_MERGE( vec_mergel );
485 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
486 VEC_LOAD_UV( );
487 VEC_MERGE( vec_mergeh );
489 /* Line 3 and 4, pixels 0 to 16 */
490 VEC_NEXT_LINES( );
491 VEC_MERGE( vec_mergel );
493 /* Line 3 and 4, pixels 16 to ( width ) */
494 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
496 VEC_LOAD_UV( );
497 VEC_MERGE( vec_mergeh );
498 VEC_MERGE( vec_mergel );
502 else
504 /* Crap, use the C version */
505 #undef VEC_NEXT_LINES
506 #undef VEC_LOAD_UV
507 #undef VEC_MERGE
508 #endif
510 const int i_source_margin = p_source->p[0].i_pitch
511 - p_source->p[0].i_visible_pitch;
512 const int i_source_margin_c = p_source->p[1].i_pitch
513 - p_source->p[1].i_visible_pitch;
514 const int i_dest_margin = p_dest->p->i_pitch
515 - p_dest->p->i_visible_pitch;
517 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
518 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
520 p_line1 = p_line2;
521 p_line2 += p_dest->p->i_pitch;
523 p_y1 = p_y2;
524 p_y2 += p_source->p[Y_PLANE].i_pitch;
526 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
528 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
529 C_YUV420_YVYU( );
530 C_YUV420_YVYU( );
531 C_YUV420_YVYU( );
532 C_YUV420_YVYU( );
533 #else
534 MMX_CALL( MMX_YUV420_YVYU );
535 #endif
537 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
539 C_YUV420_YVYU( );
542 p_y1 += i_source_margin;
543 p_y2 += i_source_margin;
544 p_u += i_source_margin_c;
545 p_v += i_source_margin_c;
546 p_line1 += i_dest_margin;
547 p_line2 += i_dest_margin;
550 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
551 /* re-enable FPU registers */
552 MMX_END;
553 #endif
555 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
557 #endif
559 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
561 ** SSE2 128 bits fetch/store instructions are faster
562 ** if memory access is 16 bytes aligned
564 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
565 ((intptr_t)p_line2|(intptr_t)p_y2))) )
567 /* use faster SSE2 aligned fetch and store */
568 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
570 p_line1 = p_line2;
571 p_line2 += p_dest->p->i_pitch;
573 p_y1 = p_y2;
574 p_y2 += p_source->p[Y_PLANE].i_pitch;
576 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
578 SSE2_CALL( SSE2_YUV420_YVYU_ALIGNED );
580 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
582 C_YUV420_YVYU( );
585 p_y1 += i_source_margin;
586 p_y2 += i_source_margin;
587 p_u += i_source_margin_c;
588 p_v += i_source_margin_c;
589 p_line1 += i_dest_margin;
590 p_line2 += i_dest_margin;
593 else
595 /* use slower SSE2 unaligned fetch and store */
596 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
598 p_line1 = p_line2;
599 p_line2 += p_dest->p->i_pitch;
601 p_y1 = p_y2;
602 p_y2 += p_source->p[Y_PLANE].i_pitch;
604 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
606 SSE2_CALL( SSE2_YUV420_YVYU_UNALIGNED );
608 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
610 C_YUV420_YVYU( );
613 p_y1 += i_source_margin;
614 p_y2 += i_source_margin;
615 p_u += i_source_margin_c;
616 p_v += i_source_margin_c;
617 p_line1 += i_dest_margin;
618 p_line2 += i_dest_margin;
621 /* make sure all SSE2 stores are visible thereafter */
622 SSE2_END;
623 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
626 /*****************************************************************************
627 * I420_UYVY: planar YUV 4:2:0 to packed UYVY 4:2:2
628 *****************************************************************************/
629 static void I420_UYVY( filter_t *p_filter, picture_t *p_source,
630 picture_t *p_dest )
632 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
633 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
634 uint8_t *p_u = p_source->U_PIXELS;
635 uint8_t *p_v = p_source->V_PIXELS;
637 int i_x, i_y;
639 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
640 #define VEC_NEXT_LINES( ) \
641 p_line1 = p_line2; \
642 p_line2 += p_dest->p->i_pitch; \
643 p_y1 = p_y2; \
644 p_y2 += p_source->p[Y_PLANE].i_pitch;
646 #define VEC_LOAD_UV( ) \
647 u_vec = vec_ld( 0, p_u ); p_u += 16; \
648 v_vec = vec_ld( 0, p_v ); p_v += 16;
650 #define VEC_MERGE( a ) \
651 uv_vec = a( u_vec, v_vec ); \
652 y_vec = vec_ld( 0, p_y1 ); p_y1 += 16; \
653 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
654 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line1 ); p_line1 += 16; \
655 y_vec = vec_ld( 0, p_y2 ); p_y2 += 16; \
656 vec_st( vec_mergeh( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16; \
657 vec_st( vec_mergel( uv_vec, y_vec ), 0, p_line2 ); p_line2 += 16;
659 vector unsigned char u_vec;
660 vector unsigned char v_vec;
661 vector unsigned char uv_vec;
662 vector unsigned char y_vec;
664 if( !( ( p_filter->fmt_in.video.i_width % 32 ) |
665 ( p_filter->fmt_in.video.i_height % 2 ) ) )
667 /* Width is a multiple of 32, we take 2 lines at a time */
668 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
670 VEC_NEXT_LINES( );
671 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
673 VEC_LOAD_UV( );
674 VEC_MERGE( vec_mergeh );
675 VEC_MERGE( vec_mergel );
679 else if( !( ( p_filter->fmt_in.video.i_width % 16 ) |
680 ( p_filter->fmt_in.video.i_height % 4 ) ) )
682 /* Width is only a multiple of 16, we take 4 lines at a time */
683 for( i_y = p_filter->fmt_in.video.i_height / 4 ; i_y-- ; )
685 /* Line 1 and 2, pixels 0 to ( width - 16 ) */
686 VEC_NEXT_LINES( );
687 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
689 VEC_LOAD_UV( );
690 VEC_MERGE( vec_mergeh );
691 VEC_MERGE( vec_mergel );
694 /* Line 1 and 2, pixels ( width - 16 ) to ( width ) */
695 VEC_LOAD_UV( );
696 VEC_MERGE( vec_mergeh );
698 /* Line 3 and 4, pixels 0 to 16 */
699 VEC_NEXT_LINES( );
700 VEC_MERGE( vec_mergel );
702 /* Line 3 and 4, pixels 16 to ( width ) */
703 for( i_x = p_filter->fmt_in.video.i_width / 32 ; i_x-- ; )
705 VEC_LOAD_UV( );
706 VEC_MERGE( vec_mergeh );
707 VEC_MERGE( vec_mergel );
711 else
713 /* Crap, use the C version */
714 #undef VEC_NEXT_LINES
715 #undef VEC_LOAD_UV
716 #undef VEC_MERGE
717 #endif
719 const int i_source_margin = p_source->p[0].i_pitch
720 - p_source->p[0].i_visible_pitch;
721 const int i_source_margin_c = p_source->p[1].i_pitch
722 - p_source->p[1].i_visible_pitch;
723 const int i_dest_margin = p_dest->p->i_pitch
724 - p_dest->p->i_visible_pitch;
726 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
727 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
729 p_line1 = p_line2;
730 p_line2 += p_dest->p->i_pitch;
732 p_y1 = p_y2;
733 p_y2 += p_source->p[Y_PLANE].i_pitch;
735 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
737 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
738 C_YUV420_UYVY( );
739 C_YUV420_UYVY( );
740 C_YUV420_UYVY( );
741 C_YUV420_UYVY( );
742 #else
743 MMX_CALL( MMX_YUV420_UYVY );
744 #endif
746 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x--; )
748 C_YUV420_UYVY( );
751 p_y1 += i_source_margin;
752 p_y2 += i_source_margin;
753 p_u += i_source_margin_c;
754 p_v += i_source_margin_c;
755 p_line1 += i_dest_margin;
756 p_line2 += i_dest_margin;
759 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
760 /* re-enable FPU registers */
761 MMX_END;
762 #endif
764 #if defined (MODULE_NAME_IS_i420_yuy2_altivec)
766 #endif
768 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
770 ** SSE2 128 bits fetch/store instructions are faster
771 ** if memory access is 16 bytes aligned
773 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
774 ((intptr_t)p_line2|(intptr_t)p_y2))) )
776 /* use faster SSE2 aligned fetch and store */
777 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
779 p_line1 = p_line2;
780 p_line2 += p_dest->p->i_pitch;
782 p_y1 = p_y2;
783 p_y2 += p_source->p[Y_PLANE].i_pitch;
785 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
787 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
789 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
791 C_YUV420_UYVY( );
794 p_y1 += i_source_margin;
795 p_y2 += i_source_margin;
796 p_u += i_source_margin_c;
797 p_v += i_source_margin_c;
798 p_line1 += i_dest_margin;
799 p_line2 += i_dest_margin;
802 else
804 /* use slower SSE2 unaligned fetch and store */
805 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
807 p_line1 = p_line2;
808 p_line2 += p_dest->p->i_pitch;
810 p_y1 = p_y2;
811 p_y2 += p_source->p[Y_PLANE].i_pitch;
813 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
815 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
817 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
819 C_YUV420_UYVY( );
822 p_y1 += i_source_margin;
823 p_y2 += i_source_margin;
824 p_u += i_source_margin_c;
825 p_v += i_source_margin_c;
826 p_line1 += i_dest_margin;
827 p_line2 += i_dest_margin;
830 /* make sure all SSE2 stores are visible thereafter */
831 SSE2_END;
832 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
835 #if !defined (MODULE_NAME_IS_i420_yuy2_altivec)
836 /*****************************************************************************
837 * I420_IUYV: planar YUV 4:2:0 to interleaved packed UYVY 4:2:2
838 *****************************************************************************/
839 static void I420_IUYV( filter_t *p_filter, picture_t *p_source,
840 picture_t *p_dest )
842 VLC_UNUSED(p_source); VLC_UNUSED(p_dest);
843 /* FIXME: TODO ! */
844 msg_Err( p_filter, "I420_IUYV unimplemented, please harass <sam@zoy.org>" );
847 /*****************************************************************************
848 * I420_cyuv: planar YUV 4:2:0 to upside-down packed UYVY 4:2:2
849 *****************************************************************************/
850 static void I420_cyuv( filter_t *p_filter, picture_t *p_source,
851 picture_t *p_dest )
853 uint8_t *p_line1 = p_dest->p->p_pixels +
854 p_dest->p->i_visible_lines * p_dest->p->i_pitch
855 + p_dest->p->i_pitch;
856 uint8_t *p_line2 = p_dest->p->p_pixels +
857 p_dest->p->i_visible_lines * p_dest->p->i_pitch;
858 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
859 uint8_t *p_u = p_source->U_PIXELS;
860 uint8_t *p_v = p_source->V_PIXELS;
862 int i_x, i_y;
864 const int i_source_margin = p_source->p[0].i_pitch
865 - p_source->p[0].i_visible_pitch;
866 const int i_source_margin_c = p_source->p[1].i_pitch
867 - p_source->p[1].i_visible_pitch;
868 const int i_dest_margin = p_dest->p->i_pitch
869 - p_dest->p->i_visible_pitch;
871 #if !defined(MODULE_NAME_IS_i420_yuy2_sse2)
872 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
874 p_line1 -= 3 * p_dest->p->i_pitch;
875 p_line2 -= 3 * p_dest->p->i_pitch;
877 p_y1 = p_y2;
878 p_y2 += p_source->p[Y_PLANE].i_pitch;
880 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
882 #if !defined (MODULE_NAME_IS_i420_yuy2_mmx)
883 C_YUV420_UYVY( );
884 C_YUV420_UYVY( );
885 C_YUV420_UYVY( );
886 C_YUV420_UYVY( );
887 #else
888 MMX_CALL( MMX_YUV420_UYVY );
889 #endif
891 for( i_x = ( p_filter->fmt_in.video.i_width % 8 ) / 2; i_x-- ; )
893 C_YUV420_UYVY( );
896 p_y1 += i_source_margin;
897 p_y2 += i_source_margin;
898 p_u += i_source_margin_c;
899 p_v += i_source_margin_c;
900 p_line1 += i_dest_margin;
901 p_line2 += i_dest_margin;
904 #if defined (MODULE_NAME_IS_i420_yuy2_mmx)
905 /* re-enable FPU registers */
906 MMX_END;
907 #endif
909 #else // defined(MODULE_NAME_IS_i420_yuy2_sse2)
911 ** SSE2 128 bits fetch/store instructions are faster
912 ** if memory access is 16 bytes aligned
914 if( 0 == (15 & (p_source->p[Y_PLANE].i_pitch|p_dest->p->i_pitch|
915 ((intptr_t)p_line2|(intptr_t)p_y2))) )
917 /* use faster SSE2 aligned fetch and store */
918 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
920 p_line1 = p_line2;
921 p_line2 += p_dest->p->i_pitch;
923 p_y1 = p_y2;
924 p_y2 += p_source->p[Y_PLANE].i_pitch;
926 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
928 SSE2_CALL( SSE2_YUV420_UYVY_ALIGNED );
930 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
932 C_YUV420_UYVY( );
935 p_y1 += i_source_margin;
936 p_y2 += i_source_margin;
937 p_u += i_source_margin_c;
938 p_v += i_source_margin_c;
939 p_line1 += i_dest_margin;
940 p_line2 += i_dest_margin;
943 else
945 /* use slower SSE2 unaligned fetch and store */
946 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
948 p_line1 = p_line2;
949 p_line2 += p_dest->p->i_pitch;
951 p_y1 = p_y2;
952 p_y2 += p_source->p[Y_PLANE].i_pitch;
954 for( i_x = p_filter->fmt_in.video.i_width / 16 ; i_x-- ; )
956 SSE2_CALL( SSE2_YUV420_UYVY_UNALIGNED );
958 for( i_x = ( p_filter->fmt_in.video.i_width % 16 ) / 2; i_x-- ; )
960 C_YUV420_UYVY( );
963 p_y1 += i_source_margin;
964 p_y2 += i_source_margin;
965 p_u += i_source_margin_c;
966 p_v += i_source_margin_c;
967 p_line1 += i_dest_margin;
968 p_line2 += i_dest_margin;
971 /* make sure all SSE2 stores are visible thereafter */
972 SSE2_END;
973 #endif // defined(MODULE_NAME_IS_i420_yuy2_sse2)
975 #endif // !defined (MODULE_NAME_IS_i420_yuy2_altivec)
977 /*****************************************************************************
978 * I420_Y211: planar YUV 4:2:0 to packed YUYV 2:1:1
979 *****************************************************************************/
980 #if defined (MODULE_NAME_IS_i420_yuy2)
981 static void I420_Y211( filter_t *p_filter, picture_t *p_source,
982 picture_t *p_dest )
984 uint8_t *p_line1, *p_line2 = p_dest->p->p_pixels;
985 uint8_t *p_y1, *p_y2 = p_source->Y_PIXELS;
986 uint8_t *p_u = p_source->U_PIXELS;
987 uint8_t *p_v = p_source->V_PIXELS;
989 int i_x, i_y;
991 const int i_source_margin = p_source->p[0].i_pitch
992 - p_source->p[0].i_visible_pitch;
993 const int i_source_margin_c = p_source->p[1].i_pitch
994 - p_source->p[1].i_visible_pitch;
995 const int i_dest_margin = p_dest->p->i_pitch
996 - p_dest->p->i_visible_pitch;
998 for( i_y = p_filter->fmt_in.video.i_height / 2 ; i_y-- ; )
1000 p_line1 = p_line2;
1001 p_line2 += p_dest->p->i_pitch;
1003 p_y1 = p_y2;
1004 p_y2 += p_source->p[Y_PLANE].i_pitch;
1006 for( i_x = p_filter->fmt_in.video.i_width / 8 ; i_x-- ; )
1008 C_YUV420_Y211( );
1009 C_YUV420_Y211( );
1012 p_y1 += i_source_margin;
1013 p_y2 += i_source_margin;
1014 p_u += i_source_margin_c;
1015 p_v += i_source_margin_c;
1016 p_line1 += i_dest_margin;
1017 p_line2 += i_dest_margin;
1020 #endif