1 /*****************************************************************************
2 * i420_rgb16_x86.c : YUV to bitmap RGB conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000 VLC authors and VideoLAN
6 * Authors: Samuel Hocevar <sam@zoy.org>
7 * Damien Fouilleul <damienf@videolan.org>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
28 #include <vlc_common.h>
29 #include <vlc_filter.h>
30 #include <vlc_picture.h>
35 # include "i420_rgb_sse2.h"
36 # define VLC_TARGET VLC_SSE
38 # include "i420_rgb_mmx.h"
39 # define VLC_TARGET VLC_MMX
42 /*****************************************************************************
43 * SetOffset: build offset array for conversion functions
44 *****************************************************************************
45 * This function will build an offset array used in later conversion functions.
46 * It will also set horizontal and vertical scaling indicators.
47 *****************************************************************************/
48 static void SetOffset( int i_width
, int i_height
, int i_pic_width
,
49 int i_pic_height
, bool *pb_hscale
,
50 unsigned int *pi_vscale
, int *p_offset
)
53 * Prepare horizontal offset array
55 if( i_pic_width
- i_width
== 0 )
56 { /* No horizontal scaling: YUV conversion is done directly to picture */
59 else if( i_pic_width
- i_width
> 0 )
60 { /* Prepare scaling array for horizontal extension */
61 int i_scale_count
= i_pic_width
;
64 for( int i_x
= i_width
; i_x
--; )
66 while( (i_scale_count
-= i_width
) > 0 )
71 i_scale_count
+= i_pic_width
;
74 else /* if( i_pic_width - i_width < 0 ) */
75 { /* Prepare scaling array for horizontal reduction */
76 int i_scale_count
= i_pic_width
;
79 for( int i_x
= i_pic_width
; i_x
--; )
82 while( (i_scale_count
-= i_pic_width
) > 0 )
87 i_scale_count
+= i_width
;
92 * Set vertical scaling indicator
94 if( i_pic_height
- i_height
== 0 )
96 else if( i_pic_height
- i_height
> 0 )
98 else /* if( i_pic_height - i_height < 0 ) */
103 void I420_R5G5B5( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
105 filter_sys_t
*p_sys
= p_filter
->p_sys
;
107 /* We got this one from the old arguments */
108 uint16_t *p_pic
= (uint16_t*)p_dest
->p
->p_pixels
;
109 uint8_t *p_y
= p_src
->Y_PIXELS
;
110 uint8_t *p_u
= p_src
->U_PIXELS
;
111 uint8_t *p_v
= p_src
->V_PIXELS
;
113 bool b_hscale
; /* horizontal scaling type */
114 unsigned int i_vscale
; /* vertical scaling type */
115 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
119 int i_scale_count
; /* scale modulo counter */
120 int i_chroma_width
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 2; /* chroma width */
121 uint16_t * p_pic_start
; /* beginning of the current line for copy */
123 /* Conversion buffer pointer */
124 uint16_t * p_buffer_start
;
127 /* Offset array pointer */
128 int * p_offset_start
= p_sys
->p_offset
;
131 const int i_source_margin
= p_src
->p
[0].i_pitch
132 - p_src
->p
[0].i_visible_pitch
133 - p_filter
->fmt_in
.video
.i_x_offset
;
134 const int i_source_margin_c
= p_src
->p
[1].i_pitch
135 - p_src
->p
[1].i_visible_pitch
136 - ( p_filter
->fmt_in
.video
.i_x_offset
/ 2 );
138 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
140 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
141 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
142 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
143 SetOffset( (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
),
144 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
),
145 (p_filter
->fmt_out
.video
.i_x_offset
+ p_filter
->fmt_out
.video
.i_visible_width
),
146 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
),
147 &b_hscale
, &i_vscale
, p_offset_start
);
150 AllocateOrGrow(&p_sys
->p_buffer
, &p_sys
->i_buffer_size
,
151 p_filter
->fmt_in
.video
.i_x_offset
+
152 p_filter
->fmt_in
.video
.i_visible_width
,
155 else p_buffer_start
= (uint16_t*)p_sys
->p_buffer
;
160 i_scale_count
= ( i_vscale
== 1 ) ?
161 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
) :
162 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
);
166 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 15;
169 ** SSE2 128 bits fetch/store instructions are faster
170 ** if memory access is 16 bytes aligned
173 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
175 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
178 ((intptr_t)p_buffer
))) )
180 /* use faster SSE2 aligned fetch and store */
181 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
185 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)/16; i_x
--; )
191 SSE2_UNPACK_15_ALIGNED
198 /* Here we do some unaligned reads and duplicate conversions, but
199 * at least we have all the pixels */
203 p_u
-= i_rewind
>> 1;
204 p_v
-= i_rewind
>> 1;
205 p_buffer
-= i_rewind
;
208 SSE2_INIT_16_UNALIGNED
211 SSE2_UNPACK_15_UNALIGNED
218 SCALE_HEIGHT( 420, 2 );
220 p_y
+= i_source_margin
;
223 p_u
+= i_source_margin_c
;
224 p_v
+= i_source_margin_c
;
226 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
231 /* use slower SSE2 unaligned fetch and store */
232 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
236 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)/16; i_x
--; )
239 SSE2_INIT_16_UNALIGNED
242 SSE2_UNPACK_15_UNALIGNED
249 /* Here we do some unaligned reads and duplicate conversions, but
250 * at least we have all the pixels */
254 p_u
-= i_rewind
>> 1;
255 p_v
-= i_rewind
>> 1;
256 p_buffer
-= i_rewind
;
259 SSE2_INIT_16_UNALIGNED
262 SSE2_UNPACK_15_UNALIGNED
269 SCALE_HEIGHT( 420, 2 );
271 p_y
+= i_source_margin
;
274 p_u
+= i_source_margin_c
;
275 p_v
+= i_source_margin_c
;
277 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
281 /* make sure all SSE2 stores are visible thereafter */
286 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 7;
288 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
291 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
293 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 8; i_x
--; )
307 /* Here we do some unaligned reads and duplicate conversions, but
308 * at least we have all the pixels */
312 p_u
-= i_rewind
>> 1;
313 p_v
-= i_rewind
>> 1;
314 p_buffer
-= i_rewind
;
327 SCALE_HEIGHT( 420, 2 );
329 p_y
+= i_source_margin
;
332 p_u
+= i_source_margin_c
;
333 p_v
+= i_source_margin_c
;
336 /* re-enable FPU registers */
343 void I420_R5G6B5( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
345 filter_sys_t
*p_sys
= p_filter
->p_sys
;
347 /* We got this one from the old arguments */
348 uint16_t *p_pic
= (uint16_t*)p_dest
->p
->p_pixels
;
349 uint8_t *p_y
= p_src
->Y_PIXELS
;
350 uint8_t *p_u
= p_src
->U_PIXELS
;
351 uint8_t *p_v
= p_src
->V_PIXELS
;
353 bool b_hscale
; /* horizontal scaling type */
354 unsigned int i_vscale
; /* vertical scaling type */
355 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
359 int i_scale_count
; /* scale modulo counter */
360 int i_chroma_width
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 2; /* chroma width */
361 uint16_t * p_pic_start
; /* beginning of the current line for copy */
363 /* Conversion buffer pointer */
364 uint16_t * p_buffer_start
;
367 /* Offset array pointer */
368 int * p_offset_start
= p_sys
->p_offset
;
371 const int i_source_margin
= p_src
->p
[0].i_pitch
372 - p_src
->p
[0].i_visible_pitch
373 - p_filter
->fmt_in
.video
.i_x_offset
;
374 const int i_source_margin_c
= p_src
->p
[1].i_pitch
375 - p_src
->p
[1].i_visible_pitch
376 - ( p_filter
->fmt_in
.video
.i_x_offset
/ 2 );
378 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
380 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
381 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
382 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
383 SetOffset( (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
),
384 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
),
385 (p_filter
->fmt_out
.video
.i_x_offset
+ p_filter
->fmt_out
.video
.i_visible_width
),
386 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
),
387 &b_hscale
, &i_vscale
, p_offset_start
);
390 AllocateOrGrow(&p_sys
->p_buffer
, &p_sys
->i_buffer_size
,
391 p_filter
->fmt_in
.video
.i_x_offset
+
392 p_filter
->fmt_in
.video
.i_visible_width
,
395 else p_buffer_start
= (uint16_t*)p_sys
->p_buffer
;
400 i_scale_count
= ( i_vscale
== 1 ) ?
401 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
) :
402 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
);
406 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 15;
409 ** SSE2 128 bits fetch/store instructions are faster
410 ** if memory access is 16 bytes aligned
413 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
415 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
418 ((intptr_t)p_buffer
))) )
420 /* use faster SSE2 aligned fetch and store */
421 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
425 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)/16; i_x
--; )
431 SSE2_UNPACK_16_ALIGNED
438 /* Here we do some unaligned reads and duplicate conversions, but
439 * at least we have all the pixels */
443 p_u
-= i_rewind
>> 1;
444 p_v
-= i_rewind
>> 1;
445 p_buffer
-= i_rewind
;
448 SSE2_INIT_16_UNALIGNED
451 SSE2_UNPACK_16_UNALIGNED
458 SCALE_HEIGHT( 420, 2 );
460 p_y
+= i_source_margin
;
463 p_u
+= i_source_margin_c
;
464 p_v
+= i_source_margin_c
;
466 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
471 /* use slower SSE2 unaligned fetch and store */
472 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
476 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)/16; i_x
--; )
479 SSE2_INIT_16_UNALIGNED
482 SSE2_UNPACK_16_UNALIGNED
489 /* Here we do some unaligned reads and duplicate conversions, but
490 * at least we have all the pixels */
494 p_u
-= i_rewind
>> 1;
495 p_v
-= i_rewind
>> 1;
496 p_buffer
-= i_rewind
;
499 SSE2_INIT_16_UNALIGNED
502 SSE2_UNPACK_16_UNALIGNED
509 SCALE_HEIGHT( 420, 2 );
511 p_y
+= i_source_margin
;
514 p_u
+= i_source_margin_c
;
515 p_v
+= i_source_margin_c
;
517 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
521 /* make sure all SSE2 stores are visible thereafter */
526 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 7;
528 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
531 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
533 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 8; i_x
--; )
547 /* Here we do some unaligned reads and duplicate conversions, but
548 * at least we have all the pixels */
552 p_u
-= i_rewind
>> 1;
553 p_v
-= i_rewind
>> 1;
554 p_buffer
-= i_rewind
;
567 SCALE_HEIGHT( 420, 2 );
569 p_y
+= i_source_margin
;
572 p_u
+= i_source_margin_c
;
573 p_v
+= i_source_margin_c
;
576 /* re-enable FPU registers */
583 void I420_A8R8G8B8( filter_t
*p_filter
, picture_t
*p_src
,
586 filter_sys_t
*p_sys
= p_filter
->p_sys
;
588 /* We got this one from the old arguments */
589 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
590 uint8_t *p_y
= p_src
->Y_PIXELS
;
591 uint8_t *p_u
= p_src
->U_PIXELS
;
592 uint8_t *p_v
= p_src
->V_PIXELS
;
594 bool b_hscale
; /* horizontal scaling type */
595 unsigned int i_vscale
; /* vertical scaling type */
596 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
600 int i_scale_count
; /* scale modulo counter */
601 int i_chroma_width
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 2; /* chroma width */
602 uint32_t * p_pic_start
; /* beginning of the current line for copy */
603 /* Conversion buffer pointer */
604 uint32_t * p_buffer_start
;
607 /* Offset array pointer */
608 int * p_offset_start
= p_sys
->p_offset
;
611 const int i_source_margin
= p_src
->p
[0].i_pitch
612 - p_src
->p
[0].i_visible_pitch
613 - p_filter
->fmt_in
.video
.i_x_offset
;
614 const int i_source_margin_c
= p_src
->p
[1].i_pitch
615 - p_src
->p
[1].i_visible_pitch
616 - ( p_filter
->fmt_in
.video
.i_x_offset
/ 2 );
618 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
620 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
621 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
622 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
623 SetOffset( p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
,
624 p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
,
625 (p_filter
->fmt_out
.video
.i_x_offset
+ p_filter
->fmt_out
.video
.i_visible_width
),
626 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
),
627 &b_hscale
, &i_vscale
, p_offset_start
);
630 AllocateOrGrow(&p_sys
->p_buffer
, &p_sys
->i_buffer_size
,
631 p_filter
->fmt_in
.video
.i_x_offset
+
632 p_filter
->fmt_in
.video
.i_visible_width
,
635 else p_buffer_start
= (uint32_t*)p_sys
->p_buffer
;
640 i_scale_count
= ( i_vscale
== 1 ) ?
641 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
) :
642 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
);
646 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 15;
649 ** SSE2 128 bits fetch/store instructions are faster
650 ** if memory access is 16 bytes aligned
653 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
655 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
658 ((intptr_t)p_buffer
))) )
660 /* use faster SSE2 aligned fetch and store */
661 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
665 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
671 SSE2_UNPACK_32_ARGB_ALIGNED
679 /* Here we do some unaligned reads and duplicate conversions, but
680 * at least we have all the pixels */
684 p_u
-= i_rewind
>> 1;
685 p_v
-= i_rewind
>> 1;
686 p_buffer
-= i_rewind
;
688 SSE2_INIT_32_UNALIGNED
691 SSE2_UNPACK_32_ARGB_UNALIGNED
698 SCALE_HEIGHT( 420, 4 );
700 p_y
+= i_source_margin
;
703 p_u
+= i_source_margin_c
;
704 p_v
+= i_source_margin_c
;
706 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
711 /* use slower SSE2 unaligned fetch and store */
712 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
716 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
719 SSE2_INIT_32_UNALIGNED
722 SSE2_UNPACK_32_ARGB_UNALIGNED
730 /* Here we do some unaligned reads and duplicate conversions, but
731 * at least we have all the pixels */
735 p_u
-= i_rewind
>> 1;
736 p_v
-= i_rewind
>> 1;
737 p_buffer
-= i_rewind
;
739 SSE2_INIT_32_UNALIGNED
742 SSE2_UNPACK_32_ARGB_UNALIGNED
749 SCALE_HEIGHT( 420, 4 );
751 p_y
+= i_source_margin
;
754 p_u
+= i_source_margin_c
;
755 p_v
+= i_source_margin_c
;
757 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
761 /* make sure all SSE2 stores are visible thereafter */
766 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 7;
768 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
771 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
773 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 8; i_x
--; )
787 /* Here we do some unaligned reads and duplicate conversions, but
788 * at least we have all the pixels */
792 p_u
-= i_rewind
>> 1;
793 p_v
-= i_rewind
>> 1;
794 p_buffer
-= i_rewind
;
806 SCALE_HEIGHT( 420, 4 );
808 p_y
+= i_source_margin
;
811 p_u
+= i_source_margin_c
;
812 p_v
+= i_source_margin_c
;
816 /* re-enable FPU registers */
823 void I420_R8G8B8A8( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
825 filter_sys_t
*p_sys
= p_filter
->p_sys
;
827 /* We got this one from the old arguments */
828 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
829 uint8_t *p_y
= p_src
->Y_PIXELS
;
830 uint8_t *p_u
= p_src
->U_PIXELS
;
831 uint8_t *p_v
= p_src
->V_PIXELS
;
833 bool b_hscale
; /* horizontal scaling type */
834 unsigned int i_vscale
; /* vertical scaling type */
835 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
839 int i_scale_count
; /* scale modulo counter */
840 int i_chroma_width
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 2; /* chroma width */
841 uint32_t * p_pic_start
; /* beginning of the current line for copy */
842 /* Conversion buffer pointer */
843 uint32_t * p_buffer_start
;
846 /* Offset array pointer */
847 int * p_offset_start
= p_sys
->p_offset
;
850 const int i_source_margin
= p_src
->p
[0].i_pitch
851 - p_src
->p
[0].i_visible_pitch
852 - p_filter
->fmt_in
.video
.i_x_offset
;
853 const int i_source_margin_c
= p_src
->p
[1].i_pitch
854 - p_src
->p
[1].i_visible_pitch
855 - ( p_filter
->fmt_in
.video
.i_x_offset
/ 2 );
857 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
859 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
860 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
861 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
862 SetOffset( (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
),
863 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
),
864 (p_filter
->fmt_out
.video
.i_x_offset
+ p_filter
->fmt_out
.video
.i_visible_width
),
865 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
),
866 &b_hscale
, &i_vscale
, p_offset_start
);
869 AllocateOrGrow(&p_sys
->p_buffer
, &p_sys
->i_buffer_size
,
870 p_filter
->fmt_in
.video
.i_x_offset
+
871 p_filter
->fmt_in
.video
.i_visible_width
,
874 else p_buffer_start
= (uint32_t*)p_sys
->p_buffer
;
879 i_scale_count
= ( i_vscale
== 1 ) ?
880 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
) :
881 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
);
885 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 15;
888 ** SSE2 128 bits fetch/store instructions are faster
889 ** if memory access is 16 bytes aligned
892 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
894 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
897 ((intptr_t)p_buffer
))) )
899 /* use faster SSE2 aligned fetch and store */
900 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
904 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
910 SSE2_UNPACK_32_RGBA_ALIGNED
918 /* Here we do some unaligned reads and duplicate conversions, but
919 * at least we have all the pixels */
923 p_u
-= i_rewind
>> 1;
924 p_v
-= i_rewind
>> 1;
925 p_buffer
-= i_rewind
;
927 SSE2_INIT_32_UNALIGNED
930 SSE2_UNPACK_32_RGBA_UNALIGNED
937 SCALE_HEIGHT( 420, 4 );
939 p_y
+= i_source_margin
;
942 p_u
+= i_source_margin_c
;
943 p_v
+= i_source_margin_c
;
945 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
950 /* use slower SSE2 unaligned fetch and store */
951 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
955 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
958 SSE2_INIT_32_UNALIGNED
961 SSE2_UNPACK_32_RGBA_UNALIGNED
969 /* Here we do some unaligned reads and duplicate conversions, but
970 * at least we have all the pixels */
974 p_u
-= i_rewind
>> 1;
975 p_v
-= i_rewind
>> 1;
976 p_buffer
-= i_rewind
;
978 SSE2_INIT_32_UNALIGNED
981 SSE2_UNPACK_32_RGBA_UNALIGNED
988 SCALE_HEIGHT( 420, 4 );
990 p_y
+= i_source_margin
;
993 p_u
+= i_source_margin_c
;
994 p_v
+= i_source_margin_c
;
996 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1000 /* make sure all SSE2 stores are visible thereafter */
1005 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 7;
1007 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
1009 p_pic_start
= p_pic
;
1010 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1012 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 8; i_x
--; )
1026 /* Here we do some unaligned reads and duplicate conversions, but
1027 * at least we have all the pixels */
1031 p_u
-= i_rewind
>> 1;
1032 p_v
-= i_rewind
>> 1;
1033 p_buffer
-= i_rewind
;
1045 SCALE_HEIGHT( 420, 4 );
1047 p_y
+= i_source_margin
;
1050 p_u
+= i_source_margin_c
;
1051 p_v
+= i_source_margin_c
;
1055 /* re-enable FPU registers */
1062 void I420_B8G8R8A8( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
1064 filter_sys_t
*p_sys
= p_filter
->p_sys
;
1066 /* We got this one from the old arguments */
1067 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
1068 uint8_t *p_y
= p_src
->Y_PIXELS
;
1069 uint8_t *p_u
= p_src
->U_PIXELS
;
1070 uint8_t *p_v
= p_src
->V_PIXELS
;
1072 bool b_hscale
; /* horizontal scaling type */
1073 unsigned int i_vscale
; /* vertical scaling type */
1074 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
1078 int i_scale_count
; /* scale modulo counter */
1079 int i_chroma_width
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 2; /* chroma width */
1080 uint32_t * p_pic_start
; /* beginning of the current line for copy */
1081 /* Conversion buffer pointer */
1082 uint32_t * p_buffer_start
;
1083 uint32_t * p_buffer
;
1085 /* Offset array pointer */
1086 int * p_offset_start
= p_sys
->p_offset
;
1089 const int i_source_margin
= p_src
->p
[0].i_pitch
1090 - p_src
->p
[0].i_visible_pitch
1091 - p_filter
->fmt_in
.video
.i_x_offset
;
1092 const int i_source_margin_c
= p_src
->p
[1].i_pitch
1093 - p_src
->p
[1].i_visible_pitch
1094 - ( p_filter
->fmt_in
.video
.i_x_offset
/ 2 );
1096 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
1098 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1099 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1100 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1101 SetOffset( (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
),
1102 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
),
1103 (p_filter
->fmt_out
.video
.i_x_offset
+ p_filter
->fmt_out
.video
.i_visible_width
),
1104 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
),
1105 &b_hscale
, &i_vscale
, p_offset_start
);
1108 AllocateOrGrow(&p_sys
->p_buffer
, &p_sys
->i_buffer_size
,
1109 p_filter
->fmt_in
.video
.i_x_offset
+
1110 p_filter
->fmt_in
.video
.i_visible_width
,
1113 else p_buffer_start
= (uint32_t*)p_sys
->p_buffer
;
1116 * Perform conversion
1118 i_scale_count
= ( i_vscale
== 1 ) ?
1119 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
) :
1120 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
);
1124 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 15;
1127 ** SSE2 128 bits fetch/store instructions are faster
1128 ** if memory access is 16 bytes aligned
1131 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1133 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
1136 ((intptr_t)p_buffer
))) )
1138 /* use faster SSE2 aligned fetch and store */
1139 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
1141 p_pic_start
= p_pic
;
1143 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
1146 SSE2_INIT_32_ALIGNED
1149 SSE2_UNPACK_32_BGRA_ALIGNED
1157 /* Here we do some unaligned reads and duplicate conversions, but
1158 * at least we have all the pixels */
1162 p_u
-= i_rewind
>> 1;
1163 p_v
-= i_rewind
>> 1;
1164 p_buffer
-= i_rewind
;
1166 SSE2_INIT_32_UNALIGNED
1169 SSE2_UNPACK_32_BGRA_UNALIGNED
1176 SCALE_HEIGHT( 420, 4 );
1178 p_y
+= i_source_margin
;
1181 p_u
+= i_source_margin_c
;
1182 p_v
+= i_source_margin_c
;
1184 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1189 /* use slower SSE2 unaligned fetch and store */
1190 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
1192 p_pic_start
= p_pic
;
1194 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
1197 SSE2_INIT_32_UNALIGNED
1200 SSE2_UNPACK_32_BGRA_UNALIGNED
1208 /* Here we do some unaligned reads and duplicate conversions, but
1209 * at least we have all the pixels */
1213 p_u
-= i_rewind
>> 1;
1214 p_v
-= i_rewind
>> 1;
1215 p_buffer
-= i_rewind
;
1217 SSE2_INIT_32_UNALIGNED
1220 SSE2_UNPACK_32_BGRA_UNALIGNED
1227 SCALE_HEIGHT( 420, 4 );
1229 p_y
+= i_source_margin
;
1232 p_u
+= i_source_margin_c
;
1233 p_v
+= i_source_margin_c
;
1235 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1239 /* make sure all SSE2 stores are visible thereafter */
1244 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 7;
1246 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
1248 p_pic_start
= p_pic
;
1249 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1251 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 8; i_x
--; )
1265 /* Here we do some unaligned reads and duplicate conversions, but
1266 * at least we have all the pixels */
1270 p_u
-= i_rewind
>> 1;
1271 p_v
-= i_rewind
>> 1;
1272 p_buffer
-= i_rewind
;
1284 SCALE_HEIGHT( 420, 4 );
1286 p_y
+= i_source_margin
;
1289 p_u
+= i_source_margin_c
;
1290 p_v
+= i_source_margin_c
;
1294 /* re-enable FPU registers */
1301 void I420_A8B8G8R8( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
1303 filter_sys_t
*p_sys
= p_filter
->p_sys
;
1305 /* We got this one from the old arguments */
1306 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
1307 uint8_t *p_y
= p_src
->Y_PIXELS
;
1308 uint8_t *p_u
= p_src
->U_PIXELS
;
1309 uint8_t *p_v
= p_src
->V_PIXELS
;
1311 bool b_hscale
; /* horizontal scaling type */
1312 unsigned int i_vscale
; /* vertical scaling type */
1313 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
1317 int i_scale_count
; /* scale modulo counter */
1318 int i_chroma_width
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 2; /* chroma width */
1319 uint32_t * p_pic_start
; /* beginning of the current line for copy */
1320 /* Conversion buffer pointer */
1321 uint32_t * p_buffer_start
;
1322 uint32_t * p_buffer
;
1324 /* Offset array pointer */
1325 int * p_offset_start
= p_sys
->p_offset
;
1328 const int i_source_margin
= p_src
->p
[0].i_pitch
1329 - p_src
->p
[0].i_visible_pitch
1330 - p_filter
->fmt_in
.video
.i_x_offset
;
1331 const int i_source_margin_c
= p_src
->p
[1].i_pitch
1332 - p_src
->p
[1].i_visible_pitch
1333 - ( p_filter
->fmt_in
.video
.i_x_offset
/ 2 );
1335 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
1337 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1338 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1339 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1340 SetOffset( (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
),
1341 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
),
1342 (p_filter
->fmt_out
.video
.i_x_offset
+ p_filter
->fmt_out
.video
.i_visible_width
),
1343 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
),
1344 &b_hscale
, &i_vscale
, p_offset_start
);
1347 AllocateOrGrow(&p_sys
->p_buffer
, &p_sys
->i_buffer_size
,
1348 p_filter
->fmt_in
.video
.i_x_offset
+
1349 p_filter
->fmt_in
.video
.i_visible_width
,
1352 else p_buffer_start
= (uint32_t*)p_sys
->p_buffer
;
1355 * Perform conversion
1357 i_scale_count
= ( i_vscale
== 1 ) ?
1358 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
) :
1359 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
);
1363 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 15;
1366 ** SSE2 128 bits fetch/store instructions are faster
1367 ** if memory access is 16 bytes aligned
1370 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1372 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
1375 ((intptr_t)p_buffer
))) )
1377 /* use faster SSE2 aligned fetch and store */
1378 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
1380 p_pic_start
= p_pic
;
1382 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
1385 SSE2_INIT_32_ALIGNED
1388 SSE2_UNPACK_32_ABGR_ALIGNED
1396 /* Here we do some unaligned reads and duplicate conversions, but
1397 * at least we have all the pixels */
1401 p_u
-= i_rewind
>> 1;
1402 p_v
-= i_rewind
>> 1;
1403 p_buffer
-= i_rewind
;
1405 SSE2_INIT_32_UNALIGNED
1408 SSE2_UNPACK_32_ABGR_UNALIGNED
1415 SCALE_HEIGHT( 420, 4 );
1417 p_y
+= i_source_margin
;
1420 p_u
+= i_source_margin_c
;
1421 p_v
+= i_source_margin_c
;
1423 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1428 /* use slower SSE2 unaligned fetch and store */
1429 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
1431 p_pic_start
= p_pic
;
1433 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
1436 SSE2_INIT_32_UNALIGNED
1439 SSE2_UNPACK_32_ABGR_UNALIGNED
1447 /* Here we do some unaligned reads and duplicate conversions, but
1448 * at least we have all the pixels */
1452 p_u
-= i_rewind
>> 1;
1453 p_v
-= i_rewind
>> 1;
1454 p_buffer
-= i_rewind
;
1456 SSE2_INIT_32_UNALIGNED
1459 SSE2_UNPACK_32_ABGR_UNALIGNED
1466 SCALE_HEIGHT( 420, 4 );
1468 p_y
+= i_source_margin
;
1471 p_u
+= i_source_margin_c
;
1472 p_v
+= i_source_margin_c
;
1474 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1478 /* make sure all SSE2 stores are visible thereafter */
1483 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 7;
1485 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
1487 p_pic_start
= p_pic
;
1488 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1490 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 8; i_x
--; )
1504 /* Here we do some unaligned reads and duplicate conversions, but
1505 * at least we have all the pixels */
1509 p_u
-= i_rewind
>> 1;
1510 p_v
-= i_rewind
>> 1;
1511 p_buffer
-= i_rewind
;
1523 SCALE_HEIGHT( 420, 4 );
1525 p_y
+= i_source_margin
;
1528 p_u
+= i_source_margin_c
;
1529 p_v
+= i_source_margin_c
;
1533 /* re-enable FPU registers */