1 /*****************************************************************************
2 * i420_rgb16.c : YUV to bitmap RGB conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000 the VideoLAN team
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damienf@videolan.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
33 #include <vlc_common.h>
34 #include <vlc_filter.h>
37 #if defined (MODULE_NAME_IS_i420_rgb)
38 # include "i420_rgb_c.h"
39 #elif defined (MODULE_NAME_IS_i420_rgb_mmx)
40 # include "../mmx/i420_rgb_mmx.h"
41 #elif defined (MODULE_NAME_IS_i420_rgb_sse2)
42 # include "../mmx/i420_rgb_mmx.h"
45 static void SetOffset( int, int, int, int, bool *,
46 unsigned int *, int * );
48 #if defined (MODULE_NAME_IS_i420_rgb)
49 /*****************************************************************************
50 * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp with dithering
51 *****************************************************************************
52 * Horizontal alignment needed:
53 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
54 * - output: 1 pixel (2 bytes), margins allowed
55 * Vertical alignment needed:
56 * - input: 2 lines (2 Y lines, 1 U/V line)
58 *****************************************************************************/
59 void I420_RGB16_dither( filter_t
*p_filter
, picture_t
*p_src
,
62 /* We got this one from the old arguments */
63 uint16_t *p_pic
= (uint16_t*)p_dest
->p
->p_pixels
;
64 uint8_t *p_y
= p_src
->Y_PIXELS
;
65 uint8_t *p_u
= p_src
->U_PIXELS
;
66 uint8_t *p_v
= p_src
->V_PIXELS
;
68 bool b_hscale
; /* horizontal scaling type */
69 unsigned int i_vscale
; /* vertical scaling type */
70 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
71 unsigned int i_real_y
; /* y % 4 */
75 int i_scale_count
; /* scale modulo counter */
76 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
77 uint16_t * p_pic_start
; /* beginning of the current line for copy */
78 int i_uval
, i_vval
; /* U and V samples */
79 int i_red
, i_green
, i_blue
; /* U and V modified samples */
80 uint16_t * p_yuv
= p_filter
->p_sys
->p_rgb16
;
81 uint16_t * p_ybase
; /* Y dependant conversion table */
83 /* Conversion buffer pointer */
84 uint16_t * p_buffer_start
= (uint16_t*)p_filter
->p_sys
->p_buffer
;
87 /* Offset array pointer */
88 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
91 const int i_source_margin
= p_src
->p
[0].i_pitch
92 - p_src
->p
[0].i_visible_pitch
;
93 const int i_source_margin_c
= p_src
->p
[1].i_pitch
94 - p_src
->p
[1].i_visible_pitch
;
96 /* The dithering matrices */
97 int dither10
[4] = { 0x0, 0x8, 0x2, 0xa };
98 int dither11
[4] = { 0xc, 0x4, 0xe, 0x6 };
99 int dither12
[4] = { 0x3, 0xb, 0x1, 0x9 };
100 int dither13
[4] = { 0xf, 0x7, 0xd, 0x5 };
102 for(i_x
= 0; i_x
< 4; i_x
++)
104 dither10
[i_x
] = dither10
[i_x
] << (SHIFT
- 4 + p_filter
->fmt_out
.video
.i_rrshift
);
105 dither11
[i_x
] = dither11
[i_x
] << (SHIFT
- 4 + p_filter
->fmt_out
.video
.i_rrshift
);
106 dither12
[i_x
] = dither12
[i_x
] << (SHIFT
- 4 + p_filter
->fmt_out
.video
.i_rrshift
);
107 dither13
[i_x
] = dither13
[i_x
] << (SHIFT
- 4 + p_filter
->fmt_out
.video
.i_rrshift
);
110 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
112 if( p_filter
->fmt_in
.video
.i_width
& 7 )
114 i_rewind
= 8 - ( p_filter
->fmt_in
.video
.i_width
& 7 );
121 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
122 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
123 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
124 SetOffset( p_filter
->fmt_in
.video
.i_width
,
125 p_filter
->fmt_in
.video
.i_height
,
126 p_filter
->fmt_out
.video
.i_width
,
127 p_filter
->fmt_out
.video
.i_height
,
128 &b_hscale
, &i_vscale
, p_offset_start
);
133 i_scale_count
= ( i_vscale
== 1 ) ?
134 p_filter
->fmt_out
.video
.i_height
:
135 p_filter
->fmt_in
.video
.i_height
;
136 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
138 i_real_y
= i_y
& 0x3;
140 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
142 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
144 int *p_dither
= dither10
;
145 CONVERT_YUV_PIXEL_DITHER(2);
147 CONVERT_Y_PIXEL_DITHER(2);
149 CONVERT_YUV_PIXEL_DITHER(2);
151 CONVERT_Y_PIXEL_DITHER(2);
153 CONVERT_YUV_PIXEL_DITHER(2);
155 CONVERT_Y_PIXEL_DITHER(2);
157 CONVERT_YUV_PIXEL_DITHER(2);
159 CONVERT_Y_PIXEL_DITHER(2);
162 /* Here we do some unaligned reads and duplicate conversions, but
163 * at least we have all the pixels */
166 int *p_dither
= dither10
;
168 p_u
-= i_rewind
>> 1;
169 p_v
-= i_rewind
>> 1;
170 p_buffer
-= i_rewind
;
171 CONVERT_YUV_PIXEL_DITHER(2);
173 CONVERT_Y_PIXEL_DITHER(2);
175 CONVERT_YUV_PIXEL_DITHER(2);
177 CONVERT_Y_PIXEL_DITHER(2);
179 CONVERT_YUV_PIXEL_DITHER(2);
181 CONVERT_Y_PIXEL_DITHER(2);
183 CONVERT_YUV_PIXEL_DITHER(2);
185 CONVERT_Y_PIXEL_DITHER(2);
188 SCALE_HEIGHT( 420, 2 );
190 p_y
+= i_source_margin
;
193 p_u
+= i_source_margin_c
;
194 p_v
+= i_source_margin_c
;
200 /*****************************************************************************
201 * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp
202 *****************************************************************************
203 * Horizontal alignment needed:
204 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
205 * - output: 1 pixel (2 bytes), margins allowed
206 * Vertical alignment needed:
207 * - input: 2 lines (2 Y lines, 1 U/V line)
209 *****************************************************************************/
211 #if defined (MODULE_NAME_IS_i420_rgb)
213 void I420_RGB16( filter_t
*p_filter
, picture_t
*p_src
,
216 /* We got this one from the old arguments */
217 uint16_t *p_pic
= (uint16_t*)p_dest
->p
->p_pixels
;
218 uint8_t *p_y
= p_src
->Y_PIXELS
;
219 uint8_t *p_u
= p_src
->U_PIXELS
;
220 uint8_t *p_v
= p_src
->V_PIXELS
;
222 bool b_hscale
; /* horizontal scaling type */
223 unsigned int i_vscale
; /* vertical scaling type */
224 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
228 int i_scale_count
; /* scale modulo counter */
229 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
230 uint16_t * p_pic_start
; /* beginning of the current line for copy */
231 int i_uval
, i_vval
; /* U and V samples */
232 int i_red
, i_green
, i_blue
; /* U and V modified samples */
233 uint16_t * p_yuv
= p_filter
->p_sys
->p_rgb16
;
234 uint16_t * p_ybase
; /* Y dependant conversion table */
236 /* Conversion buffer pointer */
237 uint16_t * p_buffer_start
= (uint16_t*)p_filter
->p_sys
->p_buffer
;
240 /* Offset array pointer */
241 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
244 const int i_source_margin
= p_src
->p
[0].i_pitch
245 - p_src
->p
[0].i_visible_pitch
;
246 const int i_source_margin_c
= p_src
->p
[1].i_pitch
247 - p_src
->p
[1].i_visible_pitch
;
249 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
251 if( p_filter
->fmt_in
.video
.i_width
& 7 )
253 i_rewind
= 8 - ( p_filter
->fmt_in
.video
.i_width
& 7 );
260 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
261 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
262 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
263 SetOffset( p_filter
->fmt_in
.video
.i_width
,
264 p_filter
->fmt_in
.video
.i_height
,
265 p_filter
->fmt_out
.video
.i_width
,
266 p_filter
->fmt_out
.video
.i_height
,
267 &b_hscale
, &i_vscale
, p_offset_start
);
272 i_scale_count
= ( i_vscale
== 1 ) ?
273 p_filter
->fmt_out
.video
.i_height
:
274 p_filter
->fmt_in
.video
.i_height
;
275 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
278 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
280 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
282 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
283 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
284 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
285 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
288 /* Here we do some unaligned reads and duplicate conversions, but
289 * at least we have all the pixels */
293 p_u
-= i_rewind
>> 1;
294 p_v
-= i_rewind
>> 1;
295 p_buffer
-= i_rewind
;
297 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
298 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
299 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
300 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
303 SCALE_HEIGHT( 420, 2 );
305 p_y
+= i_source_margin
;
308 p_u
+= i_source_margin_c
;
309 p_v
+= i_source_margin_c
;
314 #else // ! defined (MODULE_NAME_IS_i420_rgb)
316 void I420_R5G5B5( filter_t
*p_filter
, picture_t
*p_src
,
319 /* We got this one from the old arguments */
320 uint16_t *p_pic
= (uint16_t*)p_dest
->p
->p_pixels
;
321 uint8_t *p_y
= p_src
->Y_PIXELS
;
322 uint8_t *p_u
= p_src
->U_PIXELS
;
323 uint8_t *p_v
= p_src
->V_PIXELS
;
325 bool b_hscale
; /* horizontal scaling type */
326 unsigned int i_vscale
; /* vertical scaling type */
327 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
331 int i_scale_count
; /* scale modulo counter */
332 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
333 uint16_t * p_pic_start
; /* beginning of the current line for copy */
335 /* Conversion buffer pointer */
336 uint16_t * p_buffer_start
= (uint16_t*)p_filter
->p_sys
->p_buffer
;
339 /* Offset array pointer */
340 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
343 const int i_source_margin
= p_src
->p
[0].i_pitch
344 - p_src
->p
[0].i_visible_pitch
;
345 const int i_source_margin_c
= p_src
->p
[1].i_pitch
346 - p_src
->p
[1].i_visible_pitch
;
348 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
350 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
351 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
352 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
353 SetOffset( p_filter
->fmt_in
.video
.i_width
,
354 p_filter
->fmt_in
.video
.i_height
,
355 p_filter
->fmt_out
.video
.i_width
,
356 p_filter
->fmt_out
.video
.i_height
,
357 &b_hscale
, &i_vscale
, p_offset_start
);
363 i_scale_count
= ( i_vscale
== 1 ) ?
364 p_filter
->fmt_out
.video
.i_height
:
365 p_filter
->fmt_in
.video
.i_height
;
367 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
369 if( p_filter
->fmt_in
.video
.i_width
& 15 )
371 i_rewind
= 16 - ( p_filter
->fmt_in
.video
.i_width
& 15 );
379 ** SSE2 128 bits fetch/store instructions are faster
380 ** if memory access is 16 bytes aligned
383 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
384 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
387 ((intptr_t)p_buffer
))) )
389 /* use faster SSE2 aligned fetch and store */
390 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
394 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/16; i_x
--; )
400 SSE2_UNPACK_15_ALIGNED
407 /* Here we do some unaligned reads and duplicate conversions, but
408 * at least we have all the pixels */
412 p_u
-= i_rewind
>> 1;
413 p_v
-= i_rewind
>> 1;
414 p_buffer
-= i_rewind
;
417 SSE2_INIT_16_UNALIGNED
420 SSE2_UNPACK_15_UNALIGNED
427 SCALE_HEIGHT( 420, 2 );
429 p_y
+= i_source_margin
;
432 p_u
+= i_source_margin_c
;
433 p_v
+= i_source_margin_c
;
435 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
440 /* use slower SSE2 unaligned fetch and store */
441 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
444 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
446 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/16; i_x
--; )
449 SSE2_INIT_16_UNALIGNED
452 SSE2_UNPACK_15_UNALIGNED
459 /* Here we do some unaligned reads and duplicate conversions, but
460 * at least we have all the pixels */
464 p_u
-= i_rewind
>> 1;
465 p_v
-= i_rewind
>> 1;
466 p_buffer
-= i_rewind
;
469 SSE2_INIT_16_UNALIGNED
472 SSE2_UNPACK_15_UNALIGNED
479 SCALE_HEIGHT( 420, 2 );
481 p_y
+= i_source_margin
;
484 p_u
+= i_source_margin_c
;
485 p_v
+= i_source_margin_c
;
487 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
491 /* make sure all SSE2 stores are visible thereafter */
494 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
496 if( p_filter
->fmt_in
.video
.i_width
& 7 )
498 i_rewind
= 8 - ( p_filter
->fmt_in
.video
.i_width
& 7 );
505 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
508 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
510 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
524 /* Here we do some unaligned reads and duplicate conversions, but
525 * at least we have all the pixels */
529 p_u
-= i_rewind
>> 1;
530 p_v
-= i_rewind
>> 1;
531 p_buffer
-= i_rewind
;
545 SCALE_HEIGHT( 420, 2 );
547 p_y
+= i_source_margin
;
550 p_u
+= i_source_margin_c
;
551 p_v
+= i_source_margin_c
;
554 /* re-enable FPU registers */
560 void I420_R5G6B5( filter_t
*p_filter
, picture_t
*p_src
,
563 /* We got this one from the old arguments */
564 uint16_t *p_pic
= (uint16_t*)p_dest
->p
->p_pixels
;
565 uint8_t *p_y
= p_src
->Y_PIXELS
;
566 uint8_t *p_u
= p_src
->U_PIXELS
;
567 uint8_t *p_v
= p_src
->V_PIXELS
;
569 bool b_hscale
; /* horizontal scaling type */
570 unsigned int i_vscale
; /* vertical scaling type */
571 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
575 int i_scale_count
; /* scale modulo counter */
576 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
577 uint16_t * p_pic_start
; /* beginning of the current line for copy */
579 /* Conversion buffer pointer */
580 uint16_t * p_buffer_start
= (uint16_t*)p_filter
->p_sys
->p_buffer
;
583 /* Offset array pointer */
584 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
587 const int i_source_margin
= p_src
->p
[0].i_pitch
588 - p_src
->p
[0].i_visible_pitch
;
589 const int i_source_margin_c
= p_src
->p
[1].i_pitch
590 - p_src
->p
[1].i_visible_pitch
;
592 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
594 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
595 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
596 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
597 SetOffset( p_filter
->fmt_in
.video
.i_width
,
598 p_filter
->fmt_in
.video
.i_height
,
599 p_filter
->fmt_out
.video
.i_width
,
600 p_filter
->fmt_out
.video
.i_height
,
601 &b_hscale
, &i_vscale
, p_offset_start
);
607 i_scale_count
= ( i_vscale
== 1 ) ?
608 p_filter
->fmt_out
.video
.i_height
:
609 p_filter
->fmt_in
.video
.i_height
;
611 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
613 if( p_filter
->fmt_in
.video
.i_width
& 15 )
615 i_rewind
= 16 - ( p_filter
->fmt_in
.video
.i_width
& 15 );
623 ** SSE2 128 bits fetch/store instructions are faster
624 ** if memory access is 16 bytes aligned
627 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
628 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
631 ((intptr_t)p_buffer
))) )
633 /* use faster SSE2 aligned fetch and store */
634 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
638 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/16; i_x
--; )
644 SSE2_UNPACK_16_ALIGNED
651 /* Here we do some unaligned reads and duplicate conversions, but
652 * at least we have all the pixels */
656 p_u
-= i_rewind
>> 1;
657 p_v
-= i_rewind
>> 1;
658 p_buffer
-= i_rewind
;
661 SSE2_INIT_16_UNALIGNED
664 SSE2_UNPACK_16_UNALIGNED
671 SCALE_HEIGHT( 420, 2 );
673 p_y
+= i_source_margin
;
676 p_u
+= i_source_margin_c
;
677 p_v
+= i_source_margin_c
;
679 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
684 /* use slower SSE2 unaligned fetch and store */
685 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
688 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
690 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/16; i_x
--; )
693 SSE2_INIT_16_UNALIGNED
696 SSE2_UNPACK_16_UNALIGNED
703 /* Here we do some unaligned reads and duplicate conversions, but
704 * at least we have all the pixels */
708 p_u
-= i_rewind
>> 1;
709 p_v
-= i_rewind
>> 1;
710 p_buffer
-= i_rewind
;
713 SSE2_INIT_16_UNALIGNED
716 SSE2_UNPACK_16_UNALIGNED
723 SCALE_HEIGHT( 420, 2 );
725 p_y
+= i_source_margin
;
728 p_u
+= i_source_margin_c
;
729 p_v
+= i_source_margin_c
;
731 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
735 /* make sure all SSE2 stores are visible thereafter */
738 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
740 if( p_filter
->fmt_in
.video
.i_width
& 7 )
742 i_rewind
= 8 - ( p_filter
->fmt_in
.video
.i_width
& 7 );
749 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
752 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
754 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
768 /* Here we do some unaligned reads and duplicate conversions, but
769 * at least we have all the pixels */
773 p_u
-= i_rewind
>> 1;
774 p_v
-= i_rewind
>> 1;
775 p_buffer
-= i_rewind
;
789 SCALE_HEIGHT( 420, 2 );
791 p_y
+= i_source_margin
;
794 p_u
+= i_source_margin_c
;
795 p_v
+= i_source_margin_c
;
798 /* re-enable FPU registers */
806 /*****************************************************************************
807 * I420_RGB32: color YUV 4:2:0 to RGB 32 bpp
808 *****************************************************************************
809 * Horizontal alignment needed:
810 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
811 * - output: 1 pixel (2 bytes), margins allowed
812 * Vertical alignment needed:
813 * - input: 2 lines (2 Y lines, 1 U/V line)
815 *****************************************************************************/
817 #if defined (MODULE_NAME_IS_i420_rgb)
819 void I420_RGB32( filter_t
*p_filter
, picture_t
*p_src
,
822 /* We got this one from the old arguments */
823 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
824 uint8_t *p_y
= p_src
->Y_PIXELS
;
825 uint8_t *p_u
= p_src
->U_PIXELS
;
826 uint8_t *p_v
= p_src
->V_PIXELS
;
828 bool b_hscale
; /* horizontal scaling type */
829 unsigned int i_vscale
; /* vertical scaling type */
830 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
834 int i_scale_count
; /* scale modulo counter */
835 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
836 uint32_t * p_pic_start
; /* beginning of the current line for copy */
837 int i_uval
, i_vval
; /* U and V samples */
838 int i_red
, i_green
, i_blue
; /* U and V modified samples */
839 uint32_t * p_yuv
= p_filter
->p_sys
->p_rgb32
;
840 uint32_t * p_ybase
; /* Y dependant conversion table */
842 /* Conversion buffer pointer */
843 uint32_t * p_buffer_start
= (uint32_t*)p_filter
->p_sys
->p_buffer
;
846 /* Offset array pointer */
847 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
850 const int i_source_margin
= p_src
->p
[0].i_pitch
851 - p_src
->p
[0].i_visible_pitch
;
852 const int i_source_margin_c
= p_src
->p
[1].i_pitch
853 - p_src
->p
[1].i_visible_pitch
;
855 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
857 if( p_filter
->fmt_in
.video
.i_width
& 7 )
859 i_rewind
= 8 - ( p_filter
->fmt_in
.video
.i_width
& 7 );
866 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
867 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
868 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
869 SetOffset( p_filter
->fmt_in
.video
.i_width
,
870 p_filter
->fmt_in
.video
.i_height
,
871 p_filter
->fmt_out
.video
.i_width
,
872 p_filter
->fmt_out
.video
.i_height
,
873 &b_hscale
, &i_vscale
, p_offset_start
);
878 i_scale_count
= ( i_vscale
== 1 ) ?
879 p_filter
->fmt_out
.video
.i_height
:
880 p_filter
->fmt_in
.video
.i_height
;
881 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
884 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
886 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
888 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
889 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
890 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
891 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
894 /* Here we do some unaligned reads and duplicate conversions, but
895 * at least we have all the pixels */
899 p_u
-= i_rewind
>> 1;
900 p_v
-= i_rewind
>> 1;
901 p_buffer
-= i_rewind
;
902 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
903 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
904 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
905 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
908 SCALE_HEIGHT( 420, 4 );
910 p_y
+= i_source_margin
;
913 p_u
+= i_source_margin_c
;
914 p_v
+= i_source_margin_c
;
919 #else // defined (MODULE_NAME_IS_i420_rgb_mmx) || defined (MODULE_NAME_IS_i420_rgb_sse2)
921 void I420_A8R8G8B8( filter_t
*p_filter
, picture_t
*p_src
,
924 /* We got this one from the old arguments */
925 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
926 uint8_t *p_y
= p_src
->Y_PIXELS
;
927 uint8_t *p_u
= p_src
->U_PIXELS
;
928 uint8_t *p_v
= p_src
->V_PIXELS
;
930 bool b_hscale
; /* horizontal scaling type */
931 unsigned int i_vscale
; /* vertical scaling type */
932 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
936 int i_scale_count
; /* scale modulo counter */
937 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
938 uint32_t * p_pic_start
; /* beginning of the current line for copy */
939 /* Conversion buffer pointer */
940 uint32_t * p_buffer_start
= (uint32_t*)p_filter
->p_sys
->p_buffer
;
943 /* Offset array pointer */
944 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
947 const int i_source_margin
= p_src
->p
[0].i_pitch
948 - p_src
->p
[0].i_visible_pitch
;
949 const int i_source_margin_c
= p_src
->p
[1].i_pitch
950 - p_src
->p
[1].i_visible_pitch
;
952 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
954 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
955 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
956 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
957 SetOffset( p_filter
->fmt_in
.video
.i_width
,
958 p_filter
->fmt_in
.video
.i_height
,
959 p_filter
->fmt_out
.video
.i_width
,
960 p_filter
->fmt_out
.video
.i_height
,
961 &b_hscale
, &i_vscale
, p_offset_start
);
966 i_scale_count
= ( i_vscale
== 1 ) ?
967 p_filter
->fmt_out
.video
.i_height
:
968 p_filter
->fmt_in
.video
.i_height
;
970 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
972 if( p_filter
->fmt_in
.video
.i_width
& 15 )
974 i_rewind
= 16 - ( p_filter
->fmt_in
.video
.i_width
& 15 );
982 ** SSE2 128 bits fetch/store instructions are faster
983 ** if memory access is 16 bytes aligned
986 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
987 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
990 ((intptr_t)p_buffer
))) )
992 /* use faster SSE2 aligned fetch and store */
993 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
997 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
1000 SSE2_INIT_32_ALIGNED
1003 SSE2_UNPACK_32_ARGB_ALIGNED
1011 /* Here we do some unaligned reads and duplicate conversions, but
1012 * at least we have all the pixels */
1016 p_u
-= i_rewind
>> 1;
1017 p_v
-= i_rewind
>> 1;
1018 p_buffer
-= i_rewind
;
1020 SSE2_INIT_32_UNALIGNED
1023 SSE2_UNPACK_32_ARGB_UNALIGNED
1030 SCALE_HEIGHT( 420, 4 );
1032 p_y
+= i_source_margin
;
1035 p_u
+= i_source_margin_c
;
1036 p_v
+= i_source_margin_c
;
1038 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1043 /* use slower SSE2 unaligned fetch and store */
1044 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1046 p_pic_start
= p_pic
;
1047 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1049 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
1052 SSE2_INIT_32_UNALIGNED
1055 SSE2_UNPACK_32_ARGB_UNALIGNED
1063 /* Here we do some unaligned reads and duplicate conversions, but
1064 * at least we have all the pixels */
1068 p_u
-= i_rewind
>> 1;
1069 p_v
-= i_rewind
>> 1;
1070 p_buffer
-= i_rewind
;
1072 SSE2_INIT_32_UNALIGNED
1075 SSE2_UNPACK_32_ARGB_UNALIGNED
1082 SCALE_HEIGHT( 420, 4 );
1084 p_y
+= i_source_margin
;
1087 p_u
+= i_source_margin_c
;
1088 p_v
+= i_source_margin_c
;
1090 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1094 /* make sure all SSE2 stores are visible thereafter */
1097 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
1099 if( p_filter
->fmt_in
.video
.i_width
& 7 )
1101 i_rewind
= 8 - ( p_filter
->fmt_in
.video
.i_width
& 7 );
1108 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1110 p_pic_start
= p_pic
;
1111 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1113 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
1127 /* Here we do some unaligned reads and duplicate conversions, but
1128 * at least we have all the pixels */
1132 p_u
-= i_rewind
>> 1;
1133 p_v
-= i_rewind
>> 1;
1134 p_buffer
-= i_rewind
;
1147 SCALE_HEIGHT( 420, 4 );
1149 p_y
+= i_source_margin
;
1152 p_u
+= i_source_margin_c
;
1153 p_v
+= i_source_margin_c
;
1157 /* re-enable FPU registers */
1163 void I420_R8G8B8A8( filter_t
*p_filter
, picture_t
*p_src
,
1166 /* We got this one from the old arguments */
1167 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
1168 uint8_t *p_y
= p_src
->Y_PIXELS
;
1169 uint8_t *p_u
= p_src
->U_PIXELS
;
1170 uint8_t *p_v
= p_src
->V_PIXELS
;
1172 bool b_hscale
; /* horizontal scaling type */
1173 unsigned int i_vscale
; /* vertical scaling type */
1174 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
1178 int i_scale_count
; /* scale modulo counter */
1179 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
1180 uint32_t * p_pic_start
; /* beginning of the current line for copy */
1181 /* Conversion buffer pointer */
1182 uint32_t * p_buffer_start
= (uint32_t*)p_filter
->p_sys
->p_buffer
;
1183 uint32_t * p_buffer
;
1185 /* Offset array pointer */
1186 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
1189 const int i_source_margin
= p_src
->p
[0].i_pitch
1190 - p_src
->p
[0].i_visible_pitch
;
1191 const int i_source_margin_c
= p_src
->p
[1].i_pitch
1192 - p_src
->p
[1].i_visible_pitch
;
1194 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
1196 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1197 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1198 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1199 SetOffset( p_filter
->fmt_in
.video
.i_width
,
1200 p_filter
->fmt_in
.video
.i_height
,
1201 p_filter
->fmt_out
.video
.i_width
,
1202 p_filter
->fmt_out
.video
.i_height
,
1203 &b_hscale
, &i_vscale
, p_offset_start
);
1206 * Perform conversion
1208 i_scale_count
= ( i_vscale
== 1 ) ?
1209 p_filter
->fmt_out
.video
.i_height
:
1210 p_filter
->fmt_in
.video
.i_height
;
1212 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1214 if( p_filter
->fmt_in
.video
.i_width
& 15 )
1216 i_rewind
= 16 - ( p_filter
->fmt_in
.video
.i_width
& 15 );
1224 ** SSE2 128 bits fetch/store instructions are faster
1225 ** if memory access is 16 bytes aligned
1228 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1229 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
1232 ((intptr_t)p_buffer
))) )
1234 /* use faster SSE2 aligned fetch and store */
1235 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1237 p_pic_start
= p_pic
;
1239 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
1242 SSE2_INIT_32_ALIGNED
1245 SSE2_UNPACK_32_RGBA_ALIGNED
1253 /* Here we do some unaligned reads and duplicate conversions, but
1254 * at least we have all the pixels */
1258 p_u
-= i_rewind
>> 1;
1259 p_v
-= i_rewind
>> 1;
1260 p_buffer
-= i_rewind
;
1262 SSE2_INIT_32_UNALIGNED
1265 SSE2_UNPACK_32_RGBA_UNALIGNED
1272 SCALE_HEIGHT( 420, 4 );
1274 p_y
+= i_source_margin
;
1277 p_u
+= i_source_margin_c
;
1278 p_v
+= i_source_margin_c
;
1280 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1285 /* use slower SSE2 unaligned fetch and store */
1286 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1288 p_pic_start
= p_pic
;
1289 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1291 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
1294 SSE2_INIT_32_UNALIGNED
1297 SSE2_UNPACK_32_RGBA_UNALIGNED
1305 /* Here we do some unaligned reads and duplicate conversions, but
1306 * at least we have all the pixels */
1310 p_u
-= i_rewind
>> 1;
1311 p_v
-= i_rewind
>> 1;
1312 p_buffer
-= i_rewind
;
1314 SSE2_INIT_32_UNALIGNED
1317 SSE2_UNPACK_32_RGBA_UNALIGNED
1324 SCALE_HEIGHT( 420, 4 );
1326 p_y
+= i_source_margin
;
1329 p_u
+= i_source_margin_c
;
1330 p_v
+= i_source_margin_c
;
1332 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1336 /* make sure all SSE2 stores are visible thereafter */
1339 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
1341 if( p_filter
->fmt_in
.video
.i_width
& 7 )
1343 i_rewind
= 8 - ( p_filter
->fmt_in
.video
.i_width
& 7 );
1350 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1352 p_pic_start
= p_pic
;
1353 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1355 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
1369 /* Here we do some unaligned reads and duplicate conversions, but
1370 * at least we have all the pixels */
1374 p_u
-= i_rewind
>> 1;
1375 p_v
-= i_rewind
>> 1;
1376 p_buffer
-= i_rewind
;
1389 SCALE_HEIGHT( 420, 4 );
1391 p_y
+= i_source_margin
;
1394 p_u
+= i_source_margin_c
;
1395 p_v
+= i_source_margin_c
;
1399 /* re-enable FPU registers */
1405 void I420_B8G8R8A8( filter_t
*p_filter
, picture_t
*p_src
,
1408 /* We got this one from the old arguments */
1409 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
1410 uint8_t *p_y
= p_src
->Y_PIXELS
;
1411 uint8_t *p_u
= p_src
->U_PIXELS
;
1412 uint8_t *p_v
= p_src
->V_PIXELS
;
1414 bool b_hscale
; /* horizontal scaling type */
1415 unsigned int i_vscale
; /* vertical scaling type */
1416 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
1420 int i_scale_count
; /* scale modulo counter */
1421 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
1422 uint32_t * p_pic_start
; /* beginning of the current line for copy */
1423 /* Conversion buffer pointer */
1424 uint32_t * p_buffer_start
= (uint32_t*)p_filter
->p_sys
->p_buffer
;
1425 uint32_t * p_buffer
;
1427 /* Offset array pointer */
1428 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
1431 const int i_source_margin
= p_src
->p
[0].i_pitch
1432 - p_src
->p
[0].i_visible_pitch
;
1433 const int i_source_margin_c
= p_src
->p
[1].i_pitch
1434 - p_src
->p
[1].i_visible_pitch
;
1436 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
1438 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1439 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1440 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1441 SetOffset( p_filter
->fmt_in
.video
.i_width
,
1442 p_filter
->fmt_in
.video
.i_height
,
1443 p_filter
->fmt_out
.video
.i_width
,
1444 p_filter
->fmt_out
.video
.i_height
,
1445 &b_hscale
, &i_vscale
, p_offset_start
);
1448 * Perform conversion
1450 i_scale_count
= ( i_vscale
== 1 ) ?
1451 p_filter
->fmt_out
.video
.i_height
:
1452 p_filter
->fmt_in
.video
.i_height
;
1454 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1456 if( p_filter
->fmt_in
.video
.i_width
& 15 )
1458 i_rewind
= 16 - ( p_filter
->fmt_in
.video
.i_width
& 15 );
1466 ** SSE2 128 bits fetch/store instructions are faster
1467 ** if memory access is 16 bytes aligned
1470 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1471 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
1474 ((intptr_t)p_buffer
))) )
1476 /* use faster SSE2 aligned fetch and store */
1477 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1479 p_pic_start
= p_pic
;
1481 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
1484 SSE2_INIT_32_ALIGNED
1487 SSE2_UNPACK_32_BGRA_ALIGNED
1495 /* Here we do some unaligned reads and duplicate conversions, but
1496 * at least we have all the pixels */
1500 p_u
-= i_rewind
>> 1;
1501 p_v
-= i_rewind
>> 1;
1502 p_buffer
-= i_rewind
;
1504 SSE2_INIT_32_UNALIGNED
1507 SSE2_UNPACK_32_BGRA_UNALIGNED
1514 SCALE_HEIGHT( 420, 4 );
1516 p_y
+= i_source_margin
;
1519 p_u
+= i_source_margin_c
;
1520 p_v
+= i_source_margin_c
;
1522 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1527 /* use slower SSE2 unaligned fetch and store */
1528 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1530 p_pic_start
= p_pic
;
1531 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1533 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
1536 SSE2_INIT_32_UNALIGNED
1539 SSE2_UNPACK_32_BGRA_UNALIGNED
1547 /* Here we do some unaligned reads and duplicate conversions, but
1548 * at least we have all the pixels */
1552 p_u
-= i_rewind
>> 1;
1553 p_v
-= i_rewind
>> 1;
1554 p_buffer
-= i_rewind
;
1556 SSE2_INIT_32_UNALIGNED
1559 SSE2_UNPACK_32_BGRA_UNALIGNED
1566 SCALE_HEIGHT( 420, 4 );
1568 p_y
+= i_source_margin
;
1571 p_u
+= i_source_margin_c
;
1572 p_v
+= i_source_margin_c
;
1574 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1580 if( p_filter
->fmt_in
.video
.i_width
& 7 )
1582 i_rewind
= 8 - ( p_filter
->fmt_in
.video
.i_width
& 7 );
1589 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1591 p_pic_start
= p_pic
;
1592 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1594 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
1608 /* Here we do some unaligned reads and duplicate conversions, but
1609 * at least we have all the pixels */
1613 p_u
-= i_rewind
>> 1;
1614 p_v
-= i_rewind
>> 1;
1615 p_buffer
-= i_rewind
;
1628 SCALE_HEIGHT( 420, 4 );
1630 p_y
+= i_source_margin
;
1633 p_u
+= i_source_margin_c
;
1634 p_v
+= i_source_margin_c
;
1638 /* re-enable FPU registers */
1644 void I420_A8B8G8R8( filter_t
*p_filter
, picture_t
*p_src
,
1647 /* We got this one from the old arguments */
1648 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
1649 uint8_t *p_y
= p_src
->Y_PIXELS
;
1650 uint8_t *p_u
= p_src
->U_PIXELS
;
1651 uint8_t *p_v
= p_src
->V_PIXELS
;
1653 bool b_hscale
; /* horizontal scaling type */
1654 unsigned int i_vscale
; /* vertical scaling type */
1655 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
1659 int i_scale_count
; /* scale modulo counter */
1660 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
1661 uint32_t * p_pic_start
; /* beginning of the current line for copy */
1662 /* Conversion buffer pointer */
1663 uint32_t * p_buffer_start
= (uint32_t*)p_filter
->p_sys
->p_buffer
;
1664 uint32_t * p_buffer
;
1666 /* Offset array pointer */
1667 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
1670 const int i_source_margin
= p_src
->p
[0].i_pitch
1671 - p_src
->p
[0].i_visible_pitch
;
1672 const int i_source_margin_c
= p_src
->p
[1].i_pitch
1673 - p_src
->p
[1].i_visible_pitch
;
1675 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
1677 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1678 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1679 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1680 SetOffset( p_filter
->fmt_in
.video
.i_width
,
1681 p_filter
->fmt_in
.video
.i_height
,
1682 p_filter
->fmt_out
.video
.i_width
,
1683 p_filter
->fmt_out
.video
.i_height
,
1684 &b_hscale
, &i_vscale
, p_offset_start
);
1687 * Perform conversion
1689 i_scale_count
= ( i_vscale
== 1 ) ?
1690 p_filter
->fmt_out
.video
.i_height
:
1691 p_filter
->fmt_in
.video
.i_height
;
1693 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1695 if( p_filter
->fmt_in
.video
.i_width
& 15 )
1697 i_rewind
= 16 - ( p_filter
->fmt_in
.video
.i_width
& 15 );
1705 ** SSE2 128 bits fetch/store instructions are faster
1706 ** if memory access is 16 bytes aligned
1709 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1710 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
1713 ((intptr_t)p_buffer
))) )
1715 /* use faster SSE2 aligned fetch and store */
1716 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1718 p_pic_start
= p_pic
;
1720 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
1723 SSE2_INIT_32_ALIGNED
1726 SSE2_UNPACK_32_ABGR_ALIGNED
1734 /* Here we do some unaligned reads and duplicate conversions, but
1735 * at least we have all the pixels */
1739 p_u
-= i_rewind
>> 1;
1740 p_v
-= i_rewind
>> 1;
1741 p_buffer
-= i_rewind
;
1743 SSE2_INIT_32_UNALIGNED
1746 SSE2_UNPACK_32_ABGR_UNALIGNED
1753 SCALE_HEIGHT( 420, 4 );
1755 p_y
+= i_source_margin
;
1758 p_u
+= i_source_margin_c
;
1759 p_v
+= i_source_margin_c
;
1761 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1766 /* use slower SSE2 unaligned fetch and store */
1767 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1769 p_pic_start
= p_pic
;
1770 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1772 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
1775 SSE2_INIT_32_UNALIGNED
1778 SSE2_UNPACK_32_ABGR_UNALIGNED
1786 /* Here we do some unaligned reads and duplicate conversions, but
1787 * at least we have all the pixels */
1791 p_u
-= i_rewind
>> 1;
1792 p_v
-= i_rewind
>> 1;
1793 p_buffer
-= i_rewind
;
1795 SSE2_INIT_32_UNALIGNED
1798 SSE2_UNPACK_32_ABGR_UNALIGNED
1805 SCALE_HEIGHT( 420, 4 );
1807 p_y
+= i_source_margin
;
1810 p_u
+= i_source_margin_c
;
1811 p_v
+= i_source_margin_c
;
1813 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1819 if( p_filter
->fmt_in
.video
.i_width
& 7 )
1821 i_rewind
= 8 - ( p_filter
->fmt_in
.video
.i_width
& 7 );
1828 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1830 p_pic_start
= p_pic
;
1831 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1833 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
1847 /* Here we do some unaligned reads and duplicate conversions, but
1848 * at least we have all the pixels */
1852 p_u
-= i_rewind
>> 1;
1853 p_v
-= i_rewind
>> 1;
1854 p_buffer
-= i_rewind
;
1867 SCALE_HEIGHT( 420, 4 );
1869 p_y
+= i_source_margin
;
1872 p_u
+= i_source_margin_c
;
1873 p_v
+= i_source_margin_c
;
1877 /* re-enable FPU registers */
1885 /* Following functions are local */
1887 /*****************************************************************************
1888 * SetOffset: build offset array for conversion functions
1889 *****************************************************************************
1890 * This function will build an offset array used in later conversion functions.
1891 * It will also set horizontal and vertical scaling indicators.
1892 *****************************************************************************/
1893 static void SetOffset( int i_width
, int i_height
, int i_pic_width
,
1894 int i_pic_height
, bool *pb_hscale
,
1895 unsigned int *pi_vscale
, int *p_offset
)
1897 int i_x
; /* x position in destination */
1898 int i_scale_count
; /* modulo counter */
1901 * Prepare horizontal offset array
1903 if( i_pic_width
- i_width
== 0 )
1905 /* No horizontal scaling: YUV conversion is done directly to picture */
1908 else if( i_pic_width
- i_width
> 0 )
1910 /* Prepare scaling array for horizontal extension */
1912 i_scale_count
= i_pic_width
;
1913 for( i_x
= i_width
; i_x
--; )
1915 while( (i_scale_count
-= i_width
) > 0 )
1920 i_scale_count
+= i_pic_width
;
1923 else /* if( i_pic_width - i_width < 0 ) */
1925 /* Prepare scaling array for horizontal reduction */
1927 i_scale_count
= i_width
;
1928 for( i_x
= i_pic_width
; i_x
--; )
1931 while( (i_scale_count
-= i_pic_width
) > 0 )
1936 i_scale_count
+= i_width
;
1941 * Set vertical scaling indicator
1943 if( i_pic_height
- i_height
== 0 )
1947 else if( i_pic_height
- i_height
> 0 )
1951 else /* if( i_pic_height - i_height < 0 ) */