1 /*****************************************************************************
2 * i420_rgb16.c : YUV to bitmap RGB conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000 VLC authors and VideoLAN
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damienf@videolan.org>
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU Lesser General Public License as published by
12 * the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this program; if not, write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
27 *****************************************************************************/
33 #include <vlc_common.h>
34 #include <vlc_filter.h>
38 #if defined (MODULE_NAME_IS_i420_rgb)
39 # include "i420_rgb_c.h"
41 #elif defined (MODULE_NAME_IS_i420_rgb_mmx)
42 # include "../mmx/i420_rgb_mmx.h"
43 # define VLC_TARGET VLC_MMX
44 #elif defined (MODULE_NAME_IS_i420_rgb_sse2)
45 # include "../sse2/i420_rgb_sse2.h"
46 # define VLC_TARGET VLC_SSE
49 static void SetOffset( int, int, int, int, bool *,
50 unsigned int *, int * );
52 #if defined (MODULE_NAME_IS_i420_rgb)
53 /*****************************************************************************
54 * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp with dithering
55 *****************************************************************************
56 * Horizontal alignment needed:
57 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
58 * - output: 1 pixel (2 bytes), margins allowed
59 * Vertical alignment needed:
60 * - input: 2 lines (2 Y lines, 1 U/V line)
62 *****************************************************************************/
63 void I420_RGB16_dither( filter_t
*p_filter
, picture_t
*p_src
,
66 /* We got this one from the old arguments */
67 uint16_t *p_pic
= (uint16_t*)p_dest
->p
->p_pixels
;
68 uint8_t *p_y
= p_src
->Y_PIXELS
;
69 uint8_t *p_u
= p_src
->U_PIXELS
;
70 uint8_t *p_v
= p_src
->V_PIXELS
;
72 bool b_hscale
; /* horizontal scaling type */
73 unsigned int i_vscale
; /* vertical scaling type */
74 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
75 unsigned int i_real_y
; /* y % 4 */
79 int i_scale_count
; /* scale modulo counter */
80 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
81 uint16_t * p_pic_start
; /* beginning of the current line for copy */
82 int i_uval
, i_vval
; /* U and V samples */
83 int i_red
, i_green
, i_blue
; /* U and V modified samples */
84 uint16_t * p_yuv
= p_filter
->p_sys
->p_rgb16
;
85 uint16_t * p_ybase
; /* Y dependant conversion table */
87 /* Conversion buffer pointer */
88 uint16_t * p_buffer_start
= (uint16_t*)p_filter
->p_sys
->p_buffer
;
91 /* Offset array pointer */
92 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
95 const int i_source_margin
= p_src
->p
[0].i_pitch
96 - p_src
->p
[0].i_visible_pitch
;
97 const int i_source_margin_c
= p_src
->p
[1].i_pitch
98 - p_src
->p
[1].i_visible_pitch
;
100 /* The dithering matrices */
101 int dither10
[4] = { 0x0, 0x8, 0x2, 0xa };
102 int dither11
[4] = { 0xc, 0x4, 0xe, 0x6 };
103 int dither12
[4] = { 0x3, 0xb, 0x1, 0x9 };
104 int dither13
[4] = { 0xf, 0x7, 0xd, 0x5 };
106 for(i_x
= 0; i_x
< 4; i_x
++)
108 dither10
[i_x
] = dither10
[i_x
] << (SHIFT
- 4 + p_filter
->fmt_out
.video
.i_rrshift
);
109 dither11
[i_x
] = dither11
[i_x
] << (SHIFT
- 4 + p_filter
->fmt_out
.video
.i_rrshift
);
110 dither12
[i_x
] = dither12
[i_x
] << (SHIFT
- 4 + p_filter
->fmt_out
.video
.i_rrshift
);
111 dither13
[i_x
] = dither13
[i_x
] << (SHIFT
- 4 + p_filter
->fmt_out
.video
.i_rrshift
);
114 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
115 i_rewind
= (-p_filter
->fmt_in
.video
.i_width
) & 7;
117 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
118 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
119 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
120 SetOffset( p_filter
->fmt_in
.video
.i_width
,
121 p_filter
->fmt_in
.video
.i_height
,
122 p_filter
->fmt_out
.video
.i_width
,
123 p_filter
->fmt_out
.video
.i_height
,
124 &b_hscale
, &i_vscale
, p_offset_start
);
129 i_scale_count
= ( i_vscale
== 1 ) ?
130 p_filter
->fmt_out
.video
.i_height
:
131 p_filter
->fmt_in
.video
.i_height
;
132 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
134 i_real_y
= i_y
& 0x3;
136 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
138 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
140 int *p_dither
= dither10
;
141 CONVERT_YUV_PIXEL_DITHER(2);
143 CONVERT_Y_PIXEL_DITHER(2);
145 CONVERT_YUV_PIXEL_DITHER(2);
147 CONVERT_Y_PIXEL_DITHER(2);
149 CONVERT_YUV_PIXEL_DITHER(2);
151 CONVERT_Y_PIXEL_DITHER(2);
153 CONVERT_YUV_PIXEL_DITHER(2);
155 CONVERT_Y_PIXEL_DITHER(2);
158 /* Here we do some unaligned reads and duplicate conversions, but
159 * at least we have all the pixels */
162 int *p_dither
= dither10
;
164 p_u
-= i_rewind
>> 1;
165 p_v
-= i_rewind
>> 1;
166 p_buffer
-= i_rewind
;
167 CONVERT_YUV_PIXEL_DITHER(2);
169 CONVERT_Y_PIXEL_DITHER(2);
171 CONVERT_YUV_PIXEL_DITHER(2);
173 CONVERT_Y_PIXEL_DITHER(2);
175 CONVERT_YUV_PIXEL_DITHER(2);
177 CONVERT_Y_PIXEL_DITHER(2);
179 CONVERT_YUV_PIXEL_DITHER(2);
181 CONVERT_Y_PIXEL_DITHER(2);
184 SCALE_HEIGHT( 420, 2 );
186 p_y
+= i_source_margin
;
189 p_u
+= i_source_margin_c
;
190 p_v
+= i_source_margin_c
;
196 /*****************************************************************************
197 * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp
198 *****************************************************************************
199 * Horizontal alignment needed:
200 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
201 * - output: 1 pixel (2 bytes), margins allowed
202 * Vertical alignment needed:
203 * - input: 2 lines (2 Y lines, 1 U/V line)
205 *****************************************************************************/
207 #if defined (MODULE_NAME_IS_i420_rgb)
209 void I420_RGB16( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
211 /* We got this one from the old arguments */
212 uint16_t *p_pic
= (uint16_t*)p_dest
->p
->p_pixels
;
213 uint8_t *p_y
= p_src
->Y_PIXELS
;
214 uint8_t *p_u
= p_src
->U_PIXELS
;
215 uint8_t *p_v
= p_src
->V_PIXELS
;
217 bool b_hscale
; /* horizontal scaling type */
218 unsigned int i_vscale
; /* vertical scaling type */
219 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
223 int i_scale_count
; /* scale modulo counter */
224 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
225 uint16_t * p_pic_start
; /* beginning of the current line for copy */
226 int i_uval
, i_vval
; /* U and V samples */
227 int i_red
, i_green
, i_blue
; /* U and V modified samples */
228 uint16_t * p_yuv
= p_filter
->p_sys
->p_rgb16
;
229 uint16_t * p_ybase
; /* Y dependant conversion table */
231 /* Conversion buffer pointer */
232 uint16_t * p_buffer_start
= (uint16_t*)p_filter
->p_sys
->p_buffer
;
235 /* Offset array pointer */
236 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
239 const int i_source_margin
= p_src
->p
[0].i_pitch
240 - p_src
->p
[0].i_visible_pitch
;
241 const int i_source_margin_c
= p_src
->p
[1].i_pitch
242 - p_src
->p
[1].i_visible_pitch
;
244 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
245 i_rewind
= (-p_filter
->fmt_in
.video
.i_width
) & 7;
247 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
248 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
249 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
250 SetOffset( p_filter
->fmt_in
.video
.i_width
,
251 p_filter
->fmt_in
.video
.i_height
,
252 p_filter
->fmt_out
.video
.i_width
,
253 p_filter
->fmt_out
.video
.i_height
,
254 &b_hscale
, &i_vscale
, p_offset_start
);
259 i_scale_count
= ( i_vscale
== 1 ) ?
260 p_filter
->fmt_out
.video
.i_height
:
261 p_filter
->fmt_in
.video
.i_height
;
262 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
265 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
267 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
269 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
270 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
271 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
272 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
275 /* Here we do some unaligned reads and duplicate conversions, but
276 * at least we have all the pixels */
280 p_u
-= i_rewind
>> 1;
281 p_v
-= i_rewind
>> 1;
282 p_buffer
-= i_rewind
;
284 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
285 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
286 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
287 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
290 SCALE_HEIGHT( 420, 2 );
292 p_y
+= i_source_margin
;
295 p_u
+= i_source_margin_c
;
296 p_v
+= i_source_margin_c
;
301 #else // ! defined (MODULE_NAME_IS_i420_rgb)
304 void I420_R5G5B5( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
306 /* We got this one from the old arguments */
307 uint16_t *p_pic
= (uint16_t*)p_dest
->p
->p_pixels
;
308 uint8_t *p_y
= p_src
->Y_PIXELS
;
309 uint8_t *p_u
= p_src
->U_PIXELS
;
310 uint8_t *p_v
= p_src
->V_PIXELS
;
312 bool b_hscale
; /* horizontal scaling type */
313 unsigned int i_vscale
; /* vertical scaling type */
314 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
318 int i_scale_count
; /* scale modulo counter */
319 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
320 uint16_t * p_pic_start
; /* beginning of the current line for copy */
322 /* Conversion buffer pointer */
323 uint16_t * p_buffer_start
= (uint16_t*)p_filter
->p_sys
->p_buffer
;
326 /* Offset array pointer */
327 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
330 const int i_source_margin
= p_src
->p
[0].i_pitch
331 - p_src
->p
[0].i_visible_pitch
;
332 const int i_source_margin_c
= p_src
->p
[1].i_pitch
333 - p_src
->p
[1].i_visible_pitch
;
335 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
337 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
338 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
339 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
340 SetOffset( p_filter
->fmt_in
.video
.i_width
,
341 p_filter
->fmt_in
.video
.i_height
,
342 p_filter
->fmt_out
.video
.i_width
,
343 p_filter
->fmt_out
.video
.i_height
,
344 &b_hscale
, &i_vscale
, p_offset_start
);
350 i_scale_count
= ( i_vscale
== 1 ) ?
351 p_filter
->fmt_out
.video
.i_height
:
352 p_filter
->fmt_in
.video
.i_height
;
354 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
356 i_rewind
= (-p_filter
->fmt_in
.video
.i_width
) & 15;
359 ** SSE2 128 bits fetch/store instructions are faster
360 ** if memory access is 16 bytes aligned
363 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
364 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
367 ((intptr_t)p_buffer
))) )
369 /* use faster SSE2 aligned fetch and store */
370 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
374 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/16; i_x
--; )
380 SSE2_UNPACK_15_ALIGNED
387 /* Here we do some unaligned reads and duplicate conversions, but
388 * at least we have all the pixels */
392 p_u
-= i_rewind
>> 1;
393 p_v
-= i_rewind
>> 1;
394 p_buffer
-= i_rewind
;
397 SSE2_INIT_16_UNALIGNED
400 SSE2_UNPACK_15_UNALIGNED
407 SCALE_HEIGHT( 420, 2 );
409 p_y
+= i_source_margin
;
412 p_u
+= i_source_margin_c
;
413 p_v
+= i_source_margin_c
;
415 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
420 /* use slower SSE2 unaligned fetch and store */
421 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
424 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
426 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/16; i_x
--; )
429 SSE2_INIT_16_UNALIGNED
432 SSE2_UNPACK_15_UNALIGNED
439 /* Here we do some unaligned reads and duplicate conversions, but
440 * at least we have all the pixels */
444 p_u
-= i_rewind
>> 1;
445 p_v
-= i_rewind
>> 1;
446 p_buffer
-= i_rewind
;
449 SSE2_INIT_16_UNALIGNED
452 SSE2_UNPACK_15_UNALIGNED
459 SCALE_HEIGHT( 420, 2 );
461 p_y
+= i_source_margin
;
464 p_u
+= i_source_margin_c
;
465 p_v
+= i_source_margin_c
;
467 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
471 /* make sure all SSE2 stores are visible thereafter */
474 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
476 i_rewind
= (-p_filter
->fmt_in
.video
.i_width
) & 7;
478 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
481 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
483 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
497 /* Here we do some unaligned reads and duplicate conversions, but
498 * at least we have all the pixels */
502 p_u
-= i_rewind
>> 1;
503 p_v
-= i_rewind
>> 1;
504 p_buffer
-= i_rewind
;
518 SCALE_HEIGHT( 420, 2 );
520 p_y
+= i_source_margin
;
523 p_u
+= i_source_margin_c
;
524 p_v
+= i_source_margin_c
;
527 /* re-enable FPU registers */
534 void I420_R5G6B5( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
536 /* We got this one from the old arguments */
537 uint16_t *p_pic
= (uint16_t*)p_dest
->p
->p_pixels
;
538 uint8_t *p_y
= p_src
->Y_PIXELS
;
539 uint8_t *p_u
= p_src
->U_PIXELS
;
540 uint8_t *p_v
= p_src
->V_PIXELS
;
542 bool b_hscale
; /* horizontal scaling type */
543 unsigned int i_vscale
; /* vertical scaling type */
544 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
548 int i_scale_count
; /* scale modulo counter */
549 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
550 uint16_t * p_pic_start
; /* beginning of the current line for copy */
552 /* Conversion buffer pointer */
553 uint16_t * p_buffer_start
= (uint16_t*)p_filter
->p_sys
->p_buffer
;
556 /* Offset array pointer */
557 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
560 const int i_source_margin
= p_src
->p
[0].i_pitch
561 - p_src
->p
[0].i_visible_pitch
;
562 const int i_source_margin_c
= p_src
->p
[1].i_pitch
563 - p_src
->p
[1].i_visible_pitch
;
565 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
567 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
568 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
569 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
570 SetOffset( p_filter
->fmt_in
.video
.i_width
,
571 p_filter
->fmt_in
.video
.i_height
,
572 p_filter
->fmt_out
.video
.i_width
,
573 p_filter
->fmt_out
.video
.i_height
,
574 &b_hscale
, &i_vscale
, p_offset_start
);
580 i_scale_count
= ( i_vscale
== 1 ) ?
581 p_filter
->fmt_out
.video
.i_height
:
582 p_filter
->fmt_in
.video
.i_height
;
584 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
586 i_rewind
= (-p_filter
->fmt_in
.video
.i_width
) & 15;
589 ** SSE2 128 bits fetch/store instructions are faster
590 ** if memory access is 16 bytes aligned
593 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
594 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
597 ((intptr_t)p_buffer
))) )
599 /* use faster SSE2 aligned fetch and store */
600 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
604 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/16; i_x
--; )
610 SSE2_UNPACK_16_ALIGNED
617 /* Here we do some unaligned reads and duplicate conversions, but
618 * at least we have all the pixels */
622 p_u
-= i_rewind
>> 1;
623 p_v
-= i_rewind
>> 1;
624 p_buffer
-= i_rewind
;
627 SSE2_INIT_16_UNALIGNED
630 SSE2_UNPACK_16_UNALIGNED
637 SCALE_HEIGHT( 420, 2 );
639 p_y
+= i_source_margin
;
642 p_u
+= i_source_margin_c
;
643 p_v
+= i_source_margin_c
;
645 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
650 /* use slower SSE2 unaligned fetch and store */
651 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
654 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
656 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/16; i_x
--; )
659 SSE2_INIT_16_UNALIGNED
662 SSE2_UNPACK_16_UNALIGNED
669 /* Here we do some unaligned reads and duplicate conversions, but
670 * at least we have all the pixels */
674 p_u
-= i_rewind
>> 1;
675 p_v
-= i_rewind
>> 1;
676 p_buffer
-= i_rewind
;
679 SSE2_INIT_16_UNALIGNED
682 SSE2_UNPACK_16_UNALIGNED
689 SCALE_HEIGHT( 420, 2 );
691 p_y
+= i_source_margin
;
694 p_u
+= i_source_margin_c
;
695 p_v
+= i_source_margin_c
;
697 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
701 /* make sure all SSE2 stores are visible thereafter */
704 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
706 i_rewind
= (-p_filter
->fmt_in
.video
.i_width
) & 7;
708 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
711 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
713 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
727 /* Here we do some unaligned reads and duplicate conversions, but
728 * at least we have all the pixels */
732 p_u
-= i_rewind
>> 1;
733 p_v
-= i_rewind
>> 1;
734 p_buffer
-= i_rewind
;
748 SCALE_HEIGHT( 420, 2 );
750 p_y
+= i_source_margin
;
753 p_u
+= i_source_margin_c
;
754 p_v
+= i_source_margin_c
;
757 /* re-enable FPU registers */
765 /*****************************************************************************
766 * I420_RGB32: color YUV 4:2:0 to RGB 32 bpp
767 *****************************************************************************
768 * Horizontal alignment needed:
769 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
770 * - output: 1 pixel (2 bytes), margins allowed
771 * Vertical alignment needed:
772 * - input: 2 lines (2 Y lines, 1 U/V line)
774 *****************************************************************************/
776 #if defined (MODULE_NAME_IS_i420_rgb)
778 void I420_RGB32( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
780 /* We got this one from the old arguments */
781 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
782 uint8_t *p_y
= p_src
->Y_PIXELS
;
783 uint8_t *p_u
= p_src
->U_PIXELS
;
784 uint8_t *p_v
= p_src
->V_PIXELS
;
786 bool b_hscale
; /* horizontal scaling type */
787 unsigned int i_vscale
; /* vertical scaling type */
788 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
792 int i_scale_count
; /* scale modulo counter */
793 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
794 uint32_t * p_pic_start
; /* beginning of the current line for copy */
795 int i_uval
, i_vval
; /* U and V samples */
796 int i_red
, i_green
, i_blue
; /* U and V modified samples */
797 uint32_t * p_yuv
= p_filter
->p_sys
->p_rgb32
;
798 uint32_t * p_ybase
; /* Y dependant conversion table */
800 /* Conversion buffer pointer */
801 uint32_t * p_buffer_start
= (uint32_t*)p_filter
->p_sys
->p_buffer
;
804 /* Offset array pointer */
805 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
808 const int i_source_margin
= p_src
->p
[0].i_pitch
809 - p_src
->p
[0].i_visible_pitch
;
810 const int i_source_margin_c
= p_src
->p
[1].i_pitch
811 - p_src
->p
[1].i_visible_pitch
;
813 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
814 i_rewind
= (-p_filter
->fmt_in
.video
.i_width
) & 7;
816 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
817 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
818 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
819 SetOffset( p_filter
->fmt_in
.video
.i_width
,
820 p_filter
->fmt_in
.video
.i_height
,
821 p_filter
->fmt_out
.video
.i_width
,
822 p_filter
->fmt_out
.video
.i_height
,
823 &b_hscale
, &i_vscale
, p_offset_start
);
828 i_scale_count
= ( i_vscale
== 1 ) ?
829 p_filter
->fmt_out
.video
.i_height
:
830 p_filter
->fmt_in
.video
.i_height
;
831 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
834 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
836 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
838 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
839 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
840 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
841 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
844 /* Here we do some unaligned reads and duplicate conversions, but
845 * at least we have all the pixels */
849 p_u
-= i_rewind
>> 1;
850 p_v
-= i_rewind
>> 1;
851 p_buffer
-= i_rewind
;
852 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
853 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
854 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
855 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
858 SCALE_HEIGHT( 420, 4 );
860 p_y
+= i_source_margin
;
863 p_u
+= i_source_margin_c
;
864 p_v
+= i_source_margin_c
;
869 #else // defined (MODULE_NAME_IS_i420_rgb_mmx) || defined (MODULE_NAME_IS_i420_rgb_sse2)
872 void I420_A8R8G8B8( filter_t
*p_filter
, picture_t
*p_src
,
875 /* We got this one from the old arguments */
876 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
877 uint8_t *p_y
= p_src
->Y_PIXELS
;
878 uint8_t *p_u
= p_src
->U_PIXELS
;
879 uint8_t *p_v
= p_src
->V_PIXELS
;
881 bool b_hscale
; /* horizontal scaling type */
882 unsigned int i_vscale
; /* vertical scaling type */
883 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
887 int i_scale_count
; /* scale modulo counter */
888 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
889 uint32_t * p_pic_start
; /* beginning of the current line for copy */
890 /* Conversion buffer pointer */
891 uint32_t * p_buffer_start
= (uint32_t*)p_filter
->p_sys
->p_buffer
;
894 /* Offset array pointer */
895 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
898 const int i_source_margin
= p_src
->p
[0].i_pitch
899 - p_src
->p
[0].i_visible_pitch
;
900 const int i_source_margin_c
= p_src
->p
[1].i_pitch
901 - p_src
->p
[1].i_visible_pitch
;
903 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
905 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
906 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
907 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
908 SetOffset( p_filter
->fmt_in
.video
.i_width
,
909 p_filter
->fmt_in
.video
.i_height
,
910 p_filter
->fmt_out
.video
.i_width
,
911 p_filter
->fmt_out
.video
.i_height
,
912 &b_hscale
, &i_vscale
, p_offset_start
);
917 i_scale_count
= ( i_vscale
== 1 ) ?
918 p_filter
->fmt_out
.video
.i_height
:
919 p_filter
->fmt_in
.video
.i_height
;
921 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
923 i_rewind
= (-p_filter
->fmt_in
.video
.i_width
) & 15;
926 ** SSE2 128 bits fetch/store instructions are faster
927 ** if memory access is 16 bytes aligned
930 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
931 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
934 ((intptr_t)p_buffer
))) )
936 /* use faster SSE2 aligned fetch and store */
937 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
941 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
947 SSE2_UNPACK_32_ARGB_ALIGNED
955 /* Here we do some unaligned reads and duplicate conversions, but
956 * at least we have all the pixels */
960 p_u
-= i_rewind
>> 1;
961 p_v
-= i_rewind
>> 1;
962 p_buffer
-= i_rewind
;
964 SSE2_INIT_32_UNALIGNED
967 SSE2_UNPACK_32_ARGB_UNALIGNED
974 SCALE_HEIGHT( 420, 4 );
976 p_y
+= i_source_margin
;
979 p_u
+= i_source_margin_c
;
980 p_v
+= i_source_margin_c
;
982 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
987 /* use slower SSE2 unaligned fetch and store */
988 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
991 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
993 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
996 SSE2_INIT_32_UNALIGNED
999 SSE2_UNPACK_32_ARGB_UNALIGNED
1007 /* Here we do some unaligned reads and duplicate conversions, but
1008 * at least we have all the pixels */
1012 p_u
-= i_rewind
>> 1;
1013 p_v
-= i_rewind
>> 1;
1014 p_buffer
-= i_rewind
;
1016 SSE2_INIT_32_UNALIGNED
1019 SSE2_UNPACK_32_ARGB_UNALIGNED
1026 SCALE_HEIGHT( 420, 4 );
1028 p_y
+= i_source_margin
;
1031 p_u
+= i_source_margin_c
;
1032 p_v
+= i_source_margin_c
;
1034 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1038 /* make sure all SSE2 stores are visible thereafter */
1041 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
1043 i_rewind
= (-p_filter
->fmt_in
.video
.i_width
) & 7;
1045 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1047 p_pic_start
= p_pic
;
1048 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1050 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
1064 /* Here we do some unaligned reads and duplicate conversions, but
1065 * at least we have all the pixels */
1069 p_u
-= i_rewind
>> 1;
1070 p_v
-= i_rewind
>> 1;
1071 p_buffer
-= i_rewind
;
1084 SCALE_HEIGHT( 420, 4 );
1086 p_y
+= i_source_margin
;
1089 p_u
+= i_source_margin_c
;
1090 p_v
+= i_source_margin_c
;
1094 /* re-enable FPU registers */
1101 void I420_R8G8B8A8( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
1103 /* We got this one from the old arguments */
1104 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
1105 uint8_t *p_y
= p_src
->Y_PIXELS
;
1106 uint8_t *p_u
= p_src
->U_PIXELS
;
1107 uint8_t *p_v
= p_src
->V_PIXELS
;
1109 bool b_hscale
; /* horizontal scaling type */
1110 unsigned int i_vscale
; /* vertical scaling type */
1111 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
1115 int i_scale_count
; /* scale modulo counter */
1116 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
1117 uint32_t * p_pic_start
; /* beginning of the current line for copy */
1118 /* Conversion buffer pointer */
1119 uint32_t * p_buffer_start
= (uint32_t*)p_filter
->p_sys
->p_buffer
;
1120 uint32_t * p_buffer
;
1122 /* Offset array pointer */
1123 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
1126 const int i_source_margin
= p_src
->p
[0].i_pitch
1127 - p_src
->p
[0].i_visible_pitch
;
1128 const int i_source_margin_c
= p_src
->p
[1].i_pitch
1129 - p_src
->p
[1].i_visible_pitch
;
1131 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
1133 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1134 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1135 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1136 SetOffset( p_filter
->fmt_in
.video
.i_width
,
1137 p_filter
->fmt_in
.video
.i_height
,
1138 p_filter
->fmt_out
.video
.i_width
,
1139 p_filter
->fmt_out
.video
.i_height
,
1140 &b_hscale
, &i_vscale
, p_offset_start
);
1143 * Perform conversion
1145 i_scale_count
= ( i_vscale
== 1 ) ?
1146 p_filter
->fmt_out
.video
.i_height
:
1147 p_filter
->fmt_in
.video
.i_height
;
1149 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1151 i_rewind
= (-p_filter
->fmt_in
.video
.i_width
) & 15;
1154 ** SSE2 128 bits fetch/store instructions are faster
1155 ** if memory access is 16 bytes aligned
1158 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1159 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
1162 ((intptr_t)p_buffer
))) )
1164 /* use faster SSE2 aligned fetch and store */
1165 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1167 p_pic_start
= p_pic
;
1169 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
1172 SSE2_INIT_32_ALIGNED
1175 SSE2_UNPACK_32_RGBA_ALIGNED
1183 /* Here we do some unaligned reads and duplicate conversions, but
1184 * at least we have all the pixels */
1188 p_u
-= i_rewind
>> 1;
1189 p_v
-= i_rewind
>> 1;
1190 p_buffer
-= i_rewind
;
1192 SSE2_INIT_32_UNALIGNED
1195 SSE2_UNPACK_32_RGBA_UNALIGNED
1202 SCALE_HEIGHT( 420, 4 );
1204 p_y
+= i_source_margin
;
1207 p_u
+= i_source_margin_c
;
1208 p_v
+= i_source_margin_c
;
1210 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1215 /* use slower SSE2 unaligned fetch and store */
1216 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1218 p_pic_start
= p_pic
;
1219 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1221 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
1224 SSE2_INIT_32_UNALIGNED
1227 SSE2_UNPACK_32_RGBA_UNALIGNED
1235 /* Here we do some unaligned reads and duplicate conversions, but
1236 * at least we have all the pixels */
1240 p_u
-= i_rewind
>> 1;
1241 p_v
-= i_rewind
>> 1;
1242 p_buffer
-= i_rewind
;
1244 SSE2_INIT_32_UNALIGNED
1247 SSE2_UNPACK_32_RGBA_UNALIGNED
1254 SCALE_HEIGHT( 420, 4 );
1256 p_y
+= i_source_margin
;
1259 p_u
+= i_source_margin_c
;
1260 p_v
+= i_source_margin_c
;
1262 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1266 /* make sure all SSE2 stores are visible thereafter */
1269 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
1271 i_rewind
= (-p_filter
->fmt_in
.video
.i_width
) & 7;
1273 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1275 p_pic_start
= p_pic
;
1276 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1278 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
1292 /* Here we do some unaligned reads and duplicate conversions, but
1293 * at least we have all the pixels */
1297 p_u
-= i_rewind
>> 1;
1298 p_v
-= i_rewind
>> 1;
1299 p_buffer
-= i_rewind
;
1312 SCALE_HEIGHT( 420, 4 );
1314 p_y
+= i_source_margin
;
1317 p_u
+= i_source_margin_c
;
1318 p_v
+= i_source_margin_c
;
1322 /* re-enable FPU registers */
1329 void I420_B8G8R8A8( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
1331 /* We got this one from the old arguments */
1332 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
1333 uint8_t *p_y
= p_src
->Y_PIXELS
;
1334 uint8_t *p_u
= p_src
->U_PIXELS
;
1335 uint8_t *p_v
= p_src
->V_PIXELS
;
1337 bool b_hscale
; /* horizontal scaling type */
1338 unsigned int i_vscale
; /* vertical scaling type */
1339 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
1343 int i_scale_count
; /* scale modulo counter */
1344 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
1345 uint32_t * p_pic_start
; /* beginning of the current line for copy */
1346 /* Conversion buffer pointer */
1347 uint32_t * p_buffer_start
= (uint32_t*)p_filter
->p_sys
->p_buffer
;
1348 uint32_t * p_buffer
;
1350 /* Offset array pointer */
1351 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
1354 const int i_source_margin
= p_src
->p
[0].i_pitch
1355 - p_src
->p
[0].i_visible_pitch
;
1356 const int i_source_margin_c
= p_src
->p
[1].i_pitch
1357 - p_src
->p
[1].i_visible_pitch
;
1359 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
1361 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1362 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1363 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1364 SetOffset( p_filter
->fmt_in
.video
.i_width
,
1365 p_filter
->fmt_in
.video
.i_height
,
1366 p_filter
->fmt_out
.video
.i_width
,
1367 p_filter
->fmt_out
.video
.i_height
,
1368 &b_hscale
, &i_vscale
, p_offset_start
);
1371 * Perform conversion
1373 i_scale_count
= ( i_vscale
== 1 ) ?
1374 p_filter
->fmt_out
.video
.i_height
:
1375 p_filter
->fmt_in
.video
.i_height
;
1377 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1379 i_rewind
= (-p_filter
->fmt_in
.video
.i_width
) & 15;
1382 ** SSE2 128 bits fetch/store instructions are faster
1383 ** if memory access is 16 bytes aligned
1386 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1387 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
1390 ((intptr_t)p_buffer
))) )
1392 /* use faster SSE2 aligned fetch and store */
1393 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1395 p_pic_start
= p_pic
;
1397 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
1400 SSE2_INIT_32_ALIGNED
1403 SSE2_UNPACK_32_BGRA_ALIGNED
1411 /* Here we do some unaligned reads and duplicate conversions, but
1412 * at least we have all the pixels */
1416 p_u
-= i_rewind
>> 1;
1417 p_v
-= i_rewind
>> 1;
1418 p_buffer
-= i_rewind
;
1420 SSE2_INIT_32_UNALIGNED
1423 SSE2_UNPACK_32_BGRA_UNALIGNED
1430 SCALE_HEIGHT( 420, 4 );
1432 p_y
+= i_source_margin
;
1435 p_u
+= i_source_margin_c
;
1436 p_v
+= i_source_margin_c
;
1438 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1443 /* use slower SSE2 unaligned fetch and store */
1444 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1446 p_pic_start
= p_pic
;
1447 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1449 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
1452 SSE2_INIT_32_UNALIGNED
1455 SSE2_UNPACK_32_BGRA_UNALIGNED
1463 /* Here we do some unaligned reads and duplicate conversions, but
1464 * at least we have all the pixels */
1468 p_u
-= i_rewind
>> 1;
1469 p_v
-= i_rewind
>> 1;
1470 p_buffer
-= i_rewind
;
1472 SSE2_INIT_32_UNALIGNED
1475 SSE2_UNPACK_32_BGRA_UNALIGNED
1482 SCALE_HEIGHT( 420, 4 );
1484 p_y
+= i_source_margin
;
1487 p_u
+= i_source_margin_c
;
1488 p_v
+= i_source_margin_c
;
1490 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1496 i_rewind
= (-p_filter
->fmt_in
.video
.i_width
) & 7;
1498 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1500 p_pic_start
= p_pic
;
1501 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1503 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
1517 /* Here we do some unaligned reads and duplicate conversions, but
1518 * at least we have all the pixels */
1522 p_u
-= i_rewind
>> 1;
1523 p_v
-= i_rewind
>> 1;
1524 p_buffer
-= i_rewind
;
1537 SCALE_HEIGHT( 420, 4 );
1539 p_y
+= i_source_margin
;
1542 p_u
+= i_source_margin_c
;
1543 p_v
+= i_source_margin_c
;
1547 /* re-enable FPU registers */
1554 void I420_A8B8G8R8( filter_t
*p_filter
, picture_t
*p_src
, picture_t
*p_dest
)
1556 /* We got this one from the old arguments */
1557 uint32_t *p_pic
= (uint32_t*)p_dest
->p
->p_pixels
;
1558 uint8_t *p_y
= p_src
->Y_PIXELS
;
1559 uint8_t *p_u
= p_src
->U_PIXELS
;
1560 uint8_t *p_v
= p_src
->V_PIXELS
;
1562 bool b_hscale
; /* horizontal scaling type */
1563 unsigned int i_vscale
; /* vertical scaling type */
1564 unsigned int i_x
, i_y
; /* horizontal and vertical indexes */
1568 int i_scale_count
; /* scale modulo counter */
1569 int i_chroma_width
= p_filter
->fmt_in
.video
.i_width
/ 2; /* chroma width */
1570 uint32_t * p_pic_start
; /* beginning of the current line for copy */
1571 /* Conversion buffer pointer */
1572 uint32_t * p_buffer_start
= (uint32_t*)p_filter
->p_sys
->p_buffer
;
1573 uint32_t * p_buffer
;
1575 /* Offset array pointer */
1576 int * p_offset_start
= p_filter
->p_sys
->p_offset
;
1579 const int i_source_margin
= p_src
->p
[0].i_pitch
1580 - p_src
->p
[0].i_visible_pitch
;
1581 const int i_source_margin_c
= p_src
->p
[1].i_pitch
1582 - p_src
->p
[1].i_visible_pitch
;
1584 i_right_margin
= p_dest
->p
->i_pitch
- p_dest
->p
->i_visible_pitch
;
1586 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1587 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1588 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1589 SetOffset( p_filter
->fmt_in
.video
.i_width
,
1590 p_filter
->fmt_in
.video
.i_height
,
1591 p_filter
->fmt_out
.video
.i_width
,
1592 p_filter
->fmt_out
.video
.i_height
,
1593 &b_hscale
, &i_vscale
, p_offset_start
);
1596 * Perform conversion
1598 i_scale_count
= ( i_vscale
== 1 ) ?
1599 p_filter
->fmt_out
.video
.i_height
:
1600 p_filter
->fmt_in
.video
.i_height
;
1602 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1604 i_rewind
= (-p_filter
->fmt_in
.video
.i_width
) & 15;
1607 ** SSE2 128 bits fetch/store instructions are faster
1608 ** if memory access is 16 bytes aligned
1611 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1612 if( 0 == (15 & (p_src
->p
[Y_PLANE
].i_pitch
|
1615 ((intptr_t)p_buffer
))) )
1617 /* use faster SSE2 aligned fetch and store */
1618 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1620 p_pic_start
= p_pic
;
1622 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
1625 SSE2_INIT_32_ALIGNED
1628 SSE2_UNPACK_32_ABGR_ALIGNED
1636 /* Here we do some unaligned reads and duplicate conversions, but
1637 * at least we have all the pixels */
1641 p_u
-= i_rewind
>> 1;
1642 p_v
-= i_rewind
>> 1;
1643 p_buffer
-= i_rewind
;
1645 SSE2_INIT_32_UNALIGNED
1648 SSE2_UNPACK_32_ABGR_UNALIGNED
1655 SCALE_HEIGHT( 420, 4 );
1657 p_y
+= i_source_margin
;
1660 p_u
+= i_source_margin_c
;
1661 p_v
+= i_source_margin_c
;
1663 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1668 /* use slower SSE2 unaligned fetch and store */
1669 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1671 p_pic_start
= p_pic
;
1672 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1674 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 16; i_x
--; )
1677 SSE2_INIT_32_UNALIGNED
1680 SSE2_UNPACK_32_ABGR_UNALIGNED
1688 /* Here we do some unaligned reads and duplicate conversions, but
1689 * at least we have all the pixels */
1693 p_u
-= i_rewind
>> 1;
1694 p_v
-= i_rewind
>> 1;
1695 p_buffer
-= i_rewind
;
1697 SSE2_INIT_32_UNALIGNED
1700 SSE2_UNPACK_32_ABGR_UNALIGNED
1707 SCALE_HEIGHT( 420, 4 );
1709 p_y
+= i_source_margin
;
1712 p_u
+= i_source_margin_c
;
1713 p_v
+= i_source_margin_c
;
1715 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1721 i_rewind
= (-p_filter
->fmt_in
.video
.i_width
) & 7;
1723 for( i_y
= 0; i_y
< p_filter
->fmt_in
.video
.i_height
; i_y
++ )
1725 p_pic_start
= p_pic
;
1726 p_buffer
= b_hscale
? p_buffer_start
: p_pic
;
1728 for ( i_x
= p_filter
->fmt_in
.video
.i_width
/ 8; i_x
--; )
1742 /* Here we do some unaligned reads and duplicate conversions, but
1743 * at least we have all the pixels */
1747 p_u
-= i_rewind
>> 1;
1748 p_v
-= i_rewind
>> 1;
1749 p_buffer
-= i_rewind
;
1762 SCALE_HEIGHT( 420, 4 );
1764 p_y
+= i_source_margin
;
1767 p_u
+= i_source_margin_c
;
1768 p_v
+= i_source_margin_c
;
1772 /* re-enable FPU registers */
1780 /* Following functions are local */
1782 /*****************************************************************************
1783 * SetOffset: build offset array for conversion functions
1784 *****************************************************************************
1785 * This function will build an offset array used in later conversion functions.
1786 * It will also set horizontal and vertical scaling indicators.
1787 *****************************************************************************/
1788 static void SetOffset( int i_width
, int i_height
, int i_pic_width
,
1789 int i_pic_height
, bool *pb_hscale
,
1790 unsigned int *pi_vscale
, int *p_offset
)
1792 int i_x
; /* x position in destination */
1793 int i_scale_count
; /* modulo counter */
1796 * Prepare horizontal offset array
1798 if( i_pic_width
- i_width
== 0 )
1800 /* No horizontal scaling: YUV conversion is done directly to picture */
1803 else if( i_pic_width
- i_width
> 0 )
1805 /* Prepare scaling array for horizontal extension */
1807 i_scale_count
= i_pic_width
;
1808 for( i_x
= i_width
; i_x
--; )
1810 while( (i_scale_count
-= i_width
) > 0 )
1815 i_scale_count
+= i_pic_width
;
1818 else /* if( i_pic_width - i_width < 0 ) */
1820 /* Prepare scaling array for horizontal reduction */
1822 i_scale_count
= i_width
;
1823 for( i_x
= i_pic_width
; i_x
--; )
1826 while( (i_scale_count
-= i_pic_width
) > 0 )
1831 i_scale_count
+= i_width
;
1836 * Set vertical scaling indicator
1838 if( i_pic_height
- i_height
== 0 )
1842 else if( i_pic_height
- i_height
> 0 )
1846 else /* if( i_pic_height - i_height < 0 ) */