1 /*****************************************************************************
2 * i420_rgb16_x86.c : YUV to bitmap RGB conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000 VLC authors and VideoLAN
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damienf@videolan.org>
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU Lesser General Public License as published by
12 * the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this program; if not, write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
29 #include <vlc_common.h>
30 #include <vlc_filter.h>
31 #include <vlc_picture.h>
36 # include "i420_rgb_sse2.h"
37 # define VLC_TARGET VLC_SSE
39 # include "i420_rgb_mmx.h"
40 # define VLC_TARGET VLC_MMX
43 /*****************************************************************************
44 * SetOffset: build offset array for conversion functions
45 *****************************************************************************
46 * This function will build an offset array used in later conversion functions.
47 * It will also set horizontal and vertical scaling indicators.
48 *****************************************************************************/
49 static void SetOffset( int i_width
, int i_height
, int i_pic_width
,
50 int i_pic_height
, bool *pb_hscale
,
51 unsigned int *pi_vscale
, int *p_offset
)
54 * Prepare horizontal offset array
56 if( i_pic_width
- i_width
== 0 )
57 { /* No horizontal scaling: YUV conversion is done directly to picture */
60 else if( i_pic_width
- i_width
> 0 )
61 { /* Prepare scaling array for horizontal extension */
62 int i_scale_count
= i_pic_width
;
65 for( int i_x
= i_width
; i_x
--; )
67 while( (i_scale_count
-= i_width
) > 0 )
72 i_scale_count
+= i_pic_width
;
75 else /* if( i_pic_width - i_width < 0 ) */
76 { /* Prepare scaling array for horizontal reduction */
77 int i_scale_count
= i_pic_width
;
80 for( int i_x
= i_pic_width
; i_x
--; )
83 while( (i_scale_count
-= i_pic_width
) > 0 )
88 i_scale_count
+= i_width
;
93 * Set vertical scaling indicator
95 if( i_pic_height
- i_height
== 0 )
97 else if( i_pic_height
- i_height
> 0 )
99 else /* if( i_pic_height - i_height < 0 ) */
104 void I420_R5G5B5( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
106 /* We got this one from the old arguments */
107 uint16_t *p_pic
= (uint16_t*)p_dest
->p
->p_pixels
;
108 uint8_t *p_y
= p_src
->Y_PIXELS
;
109 uint8_t *p_u
= p_src
->U_PIXELS
;
110 uint8_t *p_v
= p_src
->V_PIXELS
;
112 bool b_hscale
; /* horizontal scaling type */
113 unsigned int i_vscale
; /* vertical scaling type */
114 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
118 int i_scale_count
; /* scale modulo counter */
119 int i_chroma_width
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 2; /* chroma width */
120 uint16_t * p_pic_start
; /* beginning of the current line for copy */
122 /* Conversion buffer pointer */
123 uint16_t * p_buffer_start
= (uint16_t*)p_filter
->p_sys
->p_buffer
;
126 /* Offset array pointer */
127 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
130 const int i_source_margin
= p_src
->p
[0].i_pitch
131 - p_src
->p
[0].i_visible_pitch
132 - p_filter
->fmt_in
.video
.i_x_offset
;
133 const int i_source_margin_c
= p_src
->p
[1].i_pitch
134 - p_src
->p
[1].i_visible_pitch
135 - ( p_filter
->fmt_in
.video
.i_x_offset
/ 2 );
137 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
139 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
140 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
141 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
142 SetOffset( (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
),
143 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
),
144 (p_filter
->fmt_out
.video
.i_x_offset
+ p_filter
->fmt_out
.video
.i_visible_width
),
145 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
),
146 &b_hscale
, &i_vscale
, p_offset_start
);
152 i_scale_count
= ( i_vscale
== 1 ) ?
153 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
) :
154 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
);
158 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 15;
161 ** SSE2 128 bits fetch/store instructions are faster
162 ** if memory access is 16 bytes aligned
165 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
166 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
169 ((intptr_t)p_buffer
))) )
171 /* use faster SSE2 aligned fetch and store */
172 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
176 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)/16; i_x
--; )
182 SSE2_UNPACK_15_ALIGNED
189 /* Here we do some unaligned reads and duplicate conversions, but
190 * at least we have all the pixels */
194 p_u
-= i_rewind
>> 1;
195 p_v
-= i_rewind
>> 1;
196 p_buffer
-= i_rewind
;
199 SSE2_INIT_16_UNALIGNED
202 SSE2_UNPACK_15_UNALIGNED
209 SCALE_HEIGHT( 420, 2 );
211 p_y
+= i_source_margin
;
214 p_u
+= i_source_margin_c
;
215 p_v
+= i_source_margin_c
;
217 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
222 /* use slower SSE2 unaligned fetch and store */
223 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
226 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
228 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)/16; i_x
--; )
231 SSE2_INIT_16_UNALIGNED
234 SSE2_UNPACK_15_UNALIGNED
241 /* Here we do some unaligned reads and duplicate conversions, but
242 * at least we have all the pixels */
246 p_u
-= i_rewind
>> 1;
247 p_v
-= i_rewind
>> 1;
248 p_buffer
-= i_rewind
;
251 SSE2_INIT_16_UNALIGNED
254 SSE2_UNPACK_15_UNALIGNED
261 SCALE_HEIGHT( 420, 2 );
263 p_y
+= i_source_margin
;
266 p_u
+= i_source_margin_c
;
267 p_v
+= i_source_margin_c
;
269 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
273 /* make sure all SSE2 stores are visible thereafter */
278 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 7;
280 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
283 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
285 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 8; i_x
--; )
299 /* Here we do some unaligned reads and duplicate conversions, but
300 * at least we have all the pixels */
304 p_u
-= i_rewind
>> 1;
305 p_v
-= i_rewind
>> 1;
306 p_buffer
-= i_rewind
;
320 SCALE_HEIGHT( 420, 2 );
322 p_y
+= i_source_margin
;
325 p_u
+= i_source_margin_c
;
326 p_v
+= i_source_margin_c
;
329 /* re-enable FPU registers */
336 void I420_R5G6B5( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
338 /* We got this one from the old arguments */
339 uint16_t *p_pic
= (uint16_t*)p_dest
->p
->p_pixels
;
340 uint8_t *p_y
= p_src
->Y_PIXELS
;
341 uint8_t *p_u
= p_src
->U_PIXELS
;
342 uint8_t *p_v
= p_src
->V_PIXELS
;
344 bool b_hscale
; /* horizontal scaling type */
345 unsigned int i_vscale
; /* vertical scaling type */
346 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
350 int i_scale_count
; /* scale modulo counter */
351 int i_chroma_width
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 2; /* chroma width */
352 uint16_t * p_pic_start
; /* beginning of the current line for copy */
354 /* Conversion buffer pointer */
355 uint16_t * p_buffer_start
= (uint16_t*)p_filter
->p_sys
->p_buffer
;
358 /* Offset array pointer */
359 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
362 const int i_source_margin
= p_src
->p
[0].i_pitch
363 - p_src
->p
[0].i_visible_pitch
364 - p_filter
->fmt_in
.video
.i_x_offset
;
365 const int i_source_margin_c
= p_src
->p
[1].i_pitch
366 - p_src
->p
[1].i_visible_pitch
367 - ( p_filter
->fmt_in
.video
.i_x_offset
/ 2 );
369 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
371 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
372 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
373 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
374 SetOffset( (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
),
375 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
),
376 (p_filter
->fmt_out
.video
.i_x_offset
+ p_filter
->fmt_out
.video
.i_visible_width
),
377 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
),
378 &b_hscale
, &i_vscale
, p_offset_start
);
384 i_scale_count
= ( i_vscale
== 1 ) ?
385 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
) :
386 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
);
390 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 15;
393 ** SSE2 128 bits fetch/store instructions are faster
394 ** if memory access is 16 bytes aligned
397 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
398 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
401 ((intptr_t)p_buffer
))) )
403 /* use faster SSE2 aligned fetch and store */
404 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
408 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)/16; i_x
--; )
414 SSE2_UNPACK_16_ALIGNED
421 /* Here we do some unaligned reads and duplicate conversions, but
422 * at least we have all the pixels */
426 p_u
-= i_rewind
>> 1;
427 p_v
-= i_rewind
>> 1;
428 p_buffer
-= i_rewind
;
431 SSE2_INIT_16_UNALIGNED
434 SSE2_UNPACK_16_UNALIGNED
441 SCALE_HEIGHT( 420, 2 );
443 p_y
+= i_source_margin
;
446 p_u
+= i_source_margin_c
;
447 p_v
+= i_source_margin_c
;
449 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
454 /* use slower SSE2 unaligned fetch and store */
455 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
458 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
460 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)/16; i_x
--; )
463 SSE2_INIT_16_UNALIGNED
466 SSE2_UNPACK_16_UNALIGNED
473 /* Here we do some unaligned reads and duplicate conversions, but
474 * at least we have all the pixels */
478 p_u
-= i_rewind
>> 1;
479 p_v
-= i_rewind
>> 1;
480 p_buffer
-= i_rewind
;
483 SSE2_INIT_16_UNALIGNED
486 SSE2_UNPACK_16_UNALIGNED
493 SCALE_HEIGHT( 420, 2 );
495 p_y
+= i_source_margin
;
498 p_u
+= i_source_margin_c
;
499 p_v
+= i_source_margin_c
;
501 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
505 /* make sure all SSE2 stores are visible thereafter */
510 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 7;
512 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
515 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
517 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 8; i_x
--; )
531 /* Here we do some unaligned reads and duplicate conversions, but
532 * at least we have all the pixels */
536 p_u
-= i_rewind
>> 1;
537 p_v
-= i_rewind
>> 1;
538 p_buffer
-= i_rewind
;
552 SCALE_HEIGHT( 420, 2 );
554 p_y
+= i_source_margin
;
557 p_u
+= i_source_margin_c
;
558 p_v
+= i_source_margin_c
;
561 /* re-enable FPU registers */
568 void I420_A8R8G8B8( filter_t
*p_filter
, picture_t
*p_src
,
571 /* We got this one from the old arguments */
572 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
573 uint8_t *p_y
= p_src
->Y_PIXELS
;
574 uint8_t *p_u
= p_src
->U_PIXELS
;
575 uint8_t *p_v
= p_src
->V_PIXELS
;
577 bool b_hscale
; /* horizontal scaling type */
578 unsigned int i_vscale
; /* vertical scaling type */
579 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
583 int i_scale_count
; /* scale modulo counter */
584 int i_chroma_width
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 2; /* chroma width */
585 uint32_t * p_pic_start
; /* beginning of the current line for copy */
586 /* Conversion buffer pointer */
587 uint32_t * p_buffer_start
= (uint32_t*)p_filter
->p_sys
->p_buffer
;
590 /* Offset array pointer */
591 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
594 const int i_source_margin
= p_src
->p
[0].i_pitch
595 - p_src
->p
[0].i_visible_pitch
596 - p_filter
->fmt_in
.video
.i_x_offset
;
597 const int i_source_margin_c
= p_src
->p
[1].i_pitch
598 - p_src
->p
[1].i_visible_pitch
599 - ( p_filter
->fmt_in
.video
.i_x_offset
/ 2 );
601 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
603 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
604 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
605 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
606 SetOffset( p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
,
607 p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
,
608 (p_filter
->fmt_out
.video
.i_x_offset
+ p_filter
->fmt_out
.video
.i_visible_width
),
609 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
),
610 &b_hscale
, &i_vscale
, p_offset_start
);
615 i_scale_count
= ( i_vscale
== 1 ) ?
616 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
) :
617 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
);
621 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 15;
624 ** SSE2 128 bits fetch/store instructions are faster
625 ** if memory access is 16 bytes aligned
628 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
629 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
632 ((intptr_t)p_buffer
))) )
634 /* use faster SSE2 aligned fetch and store */
635 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
639 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
645 SSE2_UNPACK_32_ARGB_ALIGNED
653 /* Here we do some unaligned reads and duplicate conversions, but
654 * at least we have all the pixels */
658 p_u
-= i_rewind
>> 1;
659 p_v
-= i_rewind
>> 1;
660 p_buffer
-= i_rewind
;
662 SSE2_INIT_32_UNALIGNED
665 SSE2_UNPACK_32_ARGB_UNALIGNED
672 SCALE_HEIGHT( 420, 4 );
674 p_y
+= i_source_margin
;
677 p_u
+= i_source_margin_c
;
678 p_v
+= i_source_margin_c
;
680 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
685 /* use slower SSE2 unaligned fetch and store */
686 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
689 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
691 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
694 SSE2_INIT_32_UNALIGNED
697 SSE2_UNPACK_32_ARGB_UNALIGNED
705 /* Here we do some unaligned reads and duplicate conversions, but
706 * at least we have all the pixels */
710 p_u
-= i_rewind
>> 1;
711 p_v
-= i_rewind
>> 1;
712 p_buffer
-= i_rewind
;
714 SSE2_INIT_32_UNALIGNED
717 SSE2_UNPACK_32_ARGB_UNALIGNED
724 SCALE_HEIGHT( 420, 4 );
726 p_y
+= i_source_margin
;
729 p_u
+= i_source_margin_c
;
730 p_v
+= i_source_margin_c
;
732 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
736 /* make sure all SSE2 stores are visible thereafter */
741 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 7;
743 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
746 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
748 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 8; i_x
--; )
762 /* Here we do some unaligned reads and duplicate conversions, but
763 * at least we have all the pixels */
767 p_u
-= i_rewind
>> 1;
768 p_v
-= i_rewind
>> 1;
769 p_buffer
-= i_rewind
;
782 SCALE_HEIGHT( 420, 4 );
784 p_y
+= i_source_margin
;
787 p_u
+= i_source_margin_c
;
788 p_v
+= i_source_margin_c
;
792 /* re-enable FPU registers */
799 void I420_R8G8B8A8( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
801 /* We got this one from the old arguments */
802 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
803 uint8_t *p_y
= p_src
->Y_PIXELS
;
804 uint8_t *p_u
= p_src
->U_PIXELS
;
805 uint8_t *p_v
= p_src
->V_PIXELS
;
807 bool b_hscale
; /* horizontal scaling type */
808 unsigned int i_vscale
; /* vertical scaling type */
809 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
813 int i_scale_count
; /* scale modulo counter */
814 int i_chroma_width
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 2; /* chroma width */
815 uint32_t * p_pic_start
; /* beginning of the current line for copy */
816 /* Conversion buffer pointer */
817 uint32_t * p_buffer_start
= (uint32_t*)p_filter
->p_sys
->p_buffer
;
820 /* Offset array pointer */
821 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
824 const int i_source_margin
= p_src
->p
[0].i_pitch
825 - p_src
->p
[0].i_visible_pitch
826 - p_filter
->fmt_in
.video
.i_x_offset
;
827 const int i_source_margin_c
= p_src
->p
[1].i_pitch
828 - p_src
->p
[1].i_visible_pitch
829 - ( p_filter
->fmt_in
.video
.i_x_offset
/ 2 );
831 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
833 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
834 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
835 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
836 SetOffset( (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
),
837 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
),
838 (p_filter
->fmt_out
.video
.i_x_offset
+ p_filter
->fmt_out
.video
.i_visible_width
),
839 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
),
840 &b_hscale
, &i_vscale
, p_offset_start
);
845 i_scale_count
= ( i_vscale
== 1 ) ?
846 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
) :
847 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
);
851 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 15;
854 ** SSE2 128 bits fetch/store instructions are faster
855 ** if memory access is 16 bytes aligned
858 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
859 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
862 ((intptr_t)p_buffer
))) )
864 /* use faster SSE2 aligned fetch and store */
865 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
869 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
875 SSE2_UNPACK_32_RGBA_ALIGNED
883 /* Here we do some unaligned reads and duplicate conversions, but
884 * at least we have all the pixels */
888 p_u
-= i_rewind
>> 1;
889 p_v
-= i_rewind
>> 1;
890 p_buffer
-= i_rewind
;
892 SSE2_INIT_32_UNALIGNED
895 SSE2_UNPACK_32_RGBA_UNALIGNED
902 SCALE_HEIGHT( 420, 4 );
904 p_y
+= i_source_margin
;
907 p_u
+= i_source_margin_c
;
908 p_v
+= i_source_margin_c
;
910 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
915 /* use slower SSE2 unaligned fetch and store */
916 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
919 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
921 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
924 SSE2_INIT_32_UNALIGNED
927 SSE2_UNPACK_32_RGBA_UNALIGNED
935 /* Here we do some unaligned reads and duplicate conversions, but
936 * at least we have all the pixels */
940 p_u
-= i_rewind
>> 1;
941 p_v
-= i_rewind
>> 1;
942 p_buffer
-= i_rewind
;
944 SSE2_INIT_32_UNALIGNED
947 SSE2_UNPACK_32_RGBA_UNALIGNED
954 SCALE_HEIGHT( 420, 4 );
956 p_y
+= i_source_margin
;
959 p_u
+= i_source_margin_c
;
960 p_v
+= i_source_margin_c
;
962 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
966 /* make sure all SSE2 stores are visible thereafter */
971 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 7;
973 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
976 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
978 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 8; i_x
--; )
992 /* Here we do some unaligned reads and duplicate conversions, but
993 * at least we have all the pixels */
997 p_u
-= i_rewind
>> 1;
998 p_v
-= i_rewind
>> 1;
999 p_buffer
-= i_rewind
;
1012 SCALE_HEIGHT( 420, 4 );
1014 p_y
+= i_source_margin
;
1017 p_u
+= i_source_margin_c
;
1018 p_v
+= i_source_margin_c
;
1022 /* re-enable FPU registers */
1029 void I420_B8G8R8A8( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
1031 /* We got this one from the old arguments */
1032 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
1033 uint8_t *p_y
= p_src
->Y_PIXELS
;
1034 uint8_t *p_u
= p_src
->U_PIXELS
;
1035 uint8_t *p_v
= p_src
->V_PIXELS
;
1037 bool b_hscale
; /* horizontal scaling type */
1038 unsigned int i_vscale
; /* vertical scaling type */
1039 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
1043 int i_scale_count
; /* scale modulo counter */
1044 int i_chroma_width
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 2; /* chroma width */
1045 uint32_t * p_pic_start
; /* beginning of the current line for copy */
1046 /* Conversion buffer pointer */
1047 uint32_t * p_buffer_start
= (uint32_t*)p_filter
->p_sys
->p_buffer
;
1048 uint32_t * p_buffer
;
1050 /* Offset array pointer */
1051 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
1054 const int i_source_margin
= p_src
->p
[0].i_pitch
1055 - p_src
->p
[0].i_visible_pitch
1056 - p_filter
->fmt_in
.video
.i_x_offset
;
1057 const int i_source_margin_c
= p_src
->p
[1].i_pitch
1058 - p_src
->p
[1].i_visible_pitch
1059 - ( p_filter
->fmt_in
.video
.i_x_offset
/ 2 );
1061 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
1063 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1064 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1065 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1066 SetOffset( (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
),
1067 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
),
1068 (p_filter
->fmt_out
.video
.i_x_offset
+ p_filter
->fmt_out
.video
.i_visible_width
),
1069 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
),
1070 &b_hscale
, &i_vscale
, p_offset_start
);
1073 * Perform conversion
1075 i_scale_count
= ( i_vscale
== 1 ) ?
1076 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
) :
1077 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
);
1081 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 15;
1084 ** SSE2 128 bits fetch/store instructions are faster
1085 ** if memory access is 16 bytes aligned
1088 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1089 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
1092 ((intptr_t)p_buffer
))) )
1094 /* use faster SSE2 aligned fetch and store */
1095 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
1097 p_pic_start
= p_pic
;
1099 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
1102 SSE2_INIT_32_ALIGNED
1105 SSE2_UNPACK_32_BGRA_ALIGNED
1113 /* Here we do some unaligned reads and duplicate conversions, but
1114 * at least we have all the pixels */
1118 p_u
-= i_rewind
>> 1;
1119 p_v
-= i_rewind
>> 1;
1120 p_buffer
-= i_rewind
;
1122 SSE2_INIT_32_UNALIGNED
1125 SSE2_UNPACK_32_BGRA_UNALIGNED
1132 SCALE_HEIGHT( 420, 4 );
1134 p_y
+= i_source_margin
;
1137 p_u
+= i_source_margin_c
;
1138 p_v
+= i_source_margin_c
;
1140 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1145 /* use slower SSE2 unaligned fetch and store */
1146 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
1148 p_pic_start
= p_pic
;
1149 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1151 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
1154 SSE2_INIT_32_UNALIGNED
1157 SSE2_UNPACK_32_BGRA_UNALIGNED
1165 /* Here we do some unaligned reads and duplicate conversions, but
1166 * at least we have all the pixels */
1170 p_u
-= i_rewind
>> 1;
1171 p_v
-= i_rewind
>> 1;
1172 p_buffer
-= i_rewind
;
1174 SSE2_INIT_32_UNALIGNED
1177 SSE2_UNPACK_32_BGRA_UNALIGNED
1184 SCALE_HEIGHT( 420, 4 );
1186 p_y
+= i_source_margin
;
1189 p_u
+= i_source_margin_c
;
1190 p_v
+= i_source_margin_c
;
1192 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1198 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 7;
1200 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
1202 p_pic_start
= p_pic
;
1203 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1205 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 8; i_x
--; )
1219 /* Here we do some unaligned reads and duplicate conversions, but
1220 * at least we have all the pixels */
1224 p_u
-= i_rewind
>> 1;
1225 p_v
-= i_rewind
>> 1;
1226 p_buffer
-= i_rewind
;
1239 SCALE_HEIGHT( 420, 4 );
1241 p_y
+= i_source_margin
;
1244 p_u
+= i_source_margin_c
;
1245 p_v
+= i_source_margin_c
;
1249 /* re-enable FPU registers */
1256 void I420_A8B8G8R8( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
1258 /* We got this one from the old arguments */
1259 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
1260 uint8_t *p_y
= p_src
->Y_PIXELS
;
1261 uint8_t *p_u
= p_src
->U_PIXELS
;
1262 uint8_t *p_v
= p_src
->V_PIXELS
;
1264 bool b_hscale
; /* horizontal scaling type */
1265 unsigned int i_vscale
; /* vertical scaling type */
1266 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
1270 int i_scale_count
; /* scale modulo counter */
1271 int i_chroma_width
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 2; /* chroma width */
1272 uint32_t * p_pic_start
; /* beginning of the current line for copy */
1273 /* Conversion buffer pointer */
1274 uint32_t * p_buffer_start
= (uint32_t*)p_filter
->p_sys
->p_buffer
;
1275 uint32_t * p_buffer
;
1277 /* Offset array pointer */
1278 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
1281 const int i_source_margin
= p_src
->p
[0].i_pitch
1282 - p_src
->p
[0].i_visible_pitch
1283 - p_filter
->fmt_in
.video
.i_x_offset
;
1284 const int i_source_margin_c
= p_src
->p
[1].i_pitch
1285 - p_src
->p
[1].i_visible_pitch
1286 - ( p_filter
->fmt_in
.video
.i_x_offset
/ 2 );
1288 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
1290 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1291 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1292 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1293 SetOffset( (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
),
1294 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
),
1295 (p_filter
->fmt_out
.video
.i_x_offset
+ p_filter
->fmt_out
.video
.i_visible_width
),
1296 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
),
1297 &b_hscale
, &i_vscale
, p_offset_start
);
1300 * Perform conversion
1302 i_scale_count
= ( i_vscale
== 1 ) ?
1303 (p_filter
->fmt_out
.video
.i_y_offset
+ p_filter
->fmt_out
.video
.i_visible_height
) :
1304 (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
);
1308 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 15;
1311 ** SSE2 128 bits fetch/store instructions are faster
1312 ** if memory access is 16 bytes aligned
1315 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1316 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
1319 ((intptr_t)p_buffer
))) )
1321 /* use faster SSE2 aligned fetch and store */
1322 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
1324 p_pic_start
= p_pic
;
1326 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
1329 SSE2_INIT_32_ALIGNED
1332 SSE2_UNPACK_32_ABGR_ALIGNED
1340 /* Here we do some unaligned reads and duplicate conversions, but
1341 * at least we have all the pixels */
1345 p_u
-= i_rewind
>> 1;
1346 p_v
-= i_rewind
>> 1;
1347 p_buffer
-= i_rewind
;
1349 SSE2_INIT_32_UNALIGNED
1352 SSE2_UNPACK_32_ABGR_UNALIGNED
1359 SCALE_HEIGHT( 420, 4 );
1361 p_y
+= i_source_margin
;
1364 p_u
+= i_source_margin_c
;
1365 p_v
+= i_source_margin_c
;
1367 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1372 /* use slower SSE2 unaligned fetch and store */
1373 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
1375 p_pic_start
= p_pic
;
1376 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1378 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 16; i_x
--; )
1381 SSE2_INIT_32_UNALIGNED
1384 SSE2_UNPACK_32_ABGR_UNALIGNED
1392 /* Here we do some unaligned reads and duplicate conversions, but
1393 * at least we have all the pixels */
1397 p_u
-= i_rewind
>> 1;
1398 p_v
-= i_rewind
>> 1;
1399 p_buffer
-= i_rewind
;
1401 SSE2_INIT_32_UNALIGNED
1404 SSE2_UNPACK_32_ABGR_UNALIGNED
1411 SCALE_HEIGHT( 420, 4 );
1413 p_y
+= i_source_margin
;
1416 p_u
+= i_source_margin_c
;
1417 p_v
+= i_source_margin_c
;
1419 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1425 i_rewind
= (-(p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
)) & 7;
1427 for( i_y
= 0; i_y
< (p_filter
->fmt_in
.video
.i_y_offset
+ p_filter
->fmt_in
.video
.i_visible_height
); i_y
++ )
1429 p_pic_start
= p_pic
;
1430 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1432 for ( i_x
= (p_filter
->fmt_in
.video
.i_x_offset
+ p_filter
->fmt_in
.video
.i_visible_width
) / 8; i_x
--; )
1446 /* Here we do some unaligned reads and duplicate conversions, but
1447 * at least we have all the pixels */
1451 p_u
-= i_rewind
>> 1;
1452 p_v
-= i_rewind
>> 1;
1453 p_buffer
-= i_rewind
;
1466 SCALE_HEIGHT( 420, 4 );
1468 p_y
+= i_source_margin
;
1471 p_u
+= i_source_margin_c
;
1472 p_v
+= i_source_margin_c
;
1476 /* re-enable FPU registers */