Typos
[vlc/asuraparaju-public.git] / modules / video_chroma / i420_rgb16.c
blobb61f9e1b17512fb09eda408ed927c12b75574da0
1 /*****************************************************************************
2 * i420_rgb16.c : YUV to bitmap RGB conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000 the VideoLAN team
5 * $Id$
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damienf@videolan.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
26 * Preamble
27 *****************************************************************************/
29 #ifdef HAVE_CONFIG_H
30 # include "config.h"
31 #endif
33 #include <vlc_common.h>
34 #include <vlc_filter.h>
36 #include "i420_rgb.h"
37 #if defined (MODULE_NAME_IS_i420_rgb)
38 # include "i420_rgb_c.h"
39 #elif defined (MODULE_NAME_IS_i420_rgb_mmx)
40 # include "../mmx/i420_rgb_mmx.h"
41 #elif defined (MODULE_NAME_IS_i420_rgb_sse2)
42 # include "../mmx/i420_rgb_mmx.h"
43 #endif
45 static void SetOffset( int, int, int, int, bool *,
46 unsigned int *, int * );
48 #if defined (MODULE_NAME_IS_i420_rgb)
49 /*****************************************************************************
50 * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp with dithering
51 *****************************************************************************
52 * Horizontal alignment needed:
53 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
54 * - output: 1 pixel (2 bytes), margins allowed
55 * Vertical alignment needed:
56 * - input: 2 lines (2 Y lines, 1 U/V line)
57 * - output: 1 line
58 *****************************************************************************/
59 void I420_RGB16_dither( filter_t *p_filter, picture_t *p_src,
60 picture_t *p_dest )
62 /* We got this one from the old arguments */
63 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
64 uint8_t *p_y = p_src->Y_PIXELS;
65 uint8_t *p_u = p_src->U_PIXELS;
66 uint8_t *p_v = p_src->V_PIXELS;
68 bool b_hscale; /* horizontal scaling type */
69 unsigned int i_vscale; /* vertical scaling type */
70 unsigned int i_x, i_y; /* horizontal and vertical indexes */
71 unsigned int i_real_y; /* y % 4 */
73 int i_right_margin;
74 int i_rewind;
75 int i_scale_count; /* scale modulo counter */
76 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
77 uint16_t * p_pic_start; /* beginning of the current line for copy */
78 int i_uval, i_vval; /* U and V samples */
79 int i_red, i_green, i_blue; /* U and V modified samples */
80 uint16_t * p_yuv = p_filter->p_sys->p_rgb16;
81 uint16_t * p_ybase; /* Y dependant conversion table */
83 /* Conversion buffer pointer */
84 uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
85 uint16_t * p_buffer;
87 /* Offset array pointer */
88 int * p_offset_start = p_filter->p_sys->p_offset;
89 int * p_offset;
91 const int i_source_margin = p_src->p[0].i_pitch
92 - p_src->p[0].i_visible_pitch;
93 const int i_source_margin_c = p_src->p[1].i_pitch
94 - p_src->p[1].i_visible_pitch;
96 /* The dithering matrices */
97 int dither10[4] = { 0x0, 0x8, 0x2, 0xa };
98 int dither11[4] = { 0xc, 0x4, 0xe, 0x6 };
99 int dither12[4] = { 0x3, 0xb, 0x1, 0x9 };
100 int dither13[4] = { 0xf, 0x7, 0xd, 0x5 };
102 for(i_x = 0; i_x < 4; i_x++)
104 dither10[i_x] = dither10[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
105 dither11[i_x] = dither11[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
106 dither12[i_x] = dither12[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
107 dither13[i_x] = dither13[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
110 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
112 if( p_filter->fmt_in.video.i_width & 7 )
114 i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
116 else
118 i_rewind = 0;
121 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
122 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
123 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
124 SetOffset( p_filter->fmt_in.video.i_width,
125 p_filter->fmt_in.video.i_height,
126 p_filter->fmt_out.video.i_width,
127 p_filter->fmt_out.video.i_height,
128 &b_hscale, &i_vscale, p_offset_start );
131 * Perform conversion
133 i_scale_count = ( i_vscale == 1 ) ?
134 p_filter->fmt_out.video.i_height :
135 p_filter->fmt_in.video.i_height;
136 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
138 i_real_y = i_y & 0x3;
139 p_pic_start = p_pic;
140 p_buffer = b_hscale ? p_buffer_start : p_pic;
142 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
144 int *p_dither = dither10;
145 CONVERT_YUV_PIXEL_DITHER(2);
146 p_dither = dither11;
147 CONVERT_Y_PIXEL_DITHER(2);
148 p_dither = dither12;
149 CONVERT_YUV_PIXEL_DITHER(2);
150 p_dither = dither13;
151 CONVERT_Y_PIXEL_DITHER(2);
152 p_dither = dither10;
153 CONVERT_YUV_PIXEL_DITHER(2);
154 p_dither = dither11;
155 CONVERT_Y_PIXEL_DITHER(2);
156 p_dither = dither12;
157 CONVERT_YUV_PIXEL_DITHER(2);
158 p_dither = dither13;
159 CONVERT_Y_PIXEL_DITHER(2);
162 /* Here we do some unaligned reads and duplicate conversions, but
163 * at least we have all the pixels */
164 if( i_rewind )
166 int *p_dither = dither10;
167 p_y -= i_rewind;
168 p_u -= i_rewind >> 1;
169 p_v -= i_rewind >> 1;
170 p_buffer -= i_rewind;
171 CONVERT_YUV_PIXEL_DITHER(2);
172 p_dither = dither11;
173 CONVERT_Y_PIXEL_DITHER(2);
174 p_dither = dither12;
175 CONVERT_YUV_PIXEL_DITHER(2);
176 p_dither = dither13;
177 CONVERT_Y_PIXEL_DITHER(2);
178 p_dither = dither10;
179 CONVERT_YUV_PIXEL_DITHER(2);
180 p_dither = dither11;
181 CONVERT_Y_PIXEL_DITHER(2);
182 p_dither = dither12;
183 CONVERT_YUV_PIXEL_DITHER(2);
184 p_dither = dither13;
185 CONVERT_Y_PIXEL_DITHER(2);
187 SCALE_WIDTH;
188 SCALE_HEIGHT( 420, 2 );
190 p_y += i_source_margin;
191 if( i_y % 2 )
193 p_u += i_source_margin_c;
194 p_v += i_source_margin_c;
198 #endif
200 /*****************************************************************************
201 * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp
202 *****************************************************************************
203 * Horizontal alignment needed:
204 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
205 * - output: 1 pixel (2 bytes), margins allowed
206 * Vertical alignment needed:
207 * - input: 2 lines (2 Y lines, 1 U/V line)
208 * - output: 1 line
209 *****************************************************************************/
211 #if defined (MODULE_NAME_IS_i420_rgb)
213 void I420_RGB16( filter_t *p_filter, picture_t *p_src,
214 picture_t *p_dest )
216 /* We got this one from the old arguments */
217 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
218 uint8_t *p_y = p_src->Y_PIXELS;
219 uint8_t *p_u = p_src->U_PIXELS;
220 uint8_t *p_v = p_src->V_PIXELS;
222 bool b_hscale; /* horizontal scaling type */
223 unsigned int i_vscale; /* vertical scaling type */
224 unsigned int i_x, i_y; /* horizontal and vertical indexes */
226 int i_right_margin;
227 int i_rewind;
228 int i_scale_count; /* scale modulo counter */
229 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
230 uint16_t * p_pic_start; /* beginning of the current line for copy */
231 int i_uval, i_vval; /* U and V samples */
232 int i_red, i_green, i_blue; /* U and V modified samples */
233 uint16_t * p_yuv = p_filter->p_sys->p_rgb16;
234 uint16_t * p_ybase; /* Y dependant conversion table */
236 /* Conversion buffer pointer */
237 uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
238 uint16_t * p_buffer;
240 /* Offset array pointer */
241 int * p_offset_start = p_filter->p_sys->p_offset;
242 int * p_offset;
244 const int i_source_margin = p_src->p[0].i_pitch
245 - p_src->p[0].i_visible_pitch;
246 const int i_source_margin_c = p_src->p[1].i_pitch
247 - p_src->p[1].i_visible_pitch;
249 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
251 if( p_filter->fmt_in.video.i_width & 7 )
253 i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
255 else
257 i_rewind = 0;
260 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
261 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
262 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
263 SetOffset( p_filter->fmt_in.video.i_width,
264 p_filter->fmt_in.video.i_height,
265 p_filter->fmt_out.video.i_width,
266 p_filter->fmt_out.video.i_height,
267 &b_hscale, &i_vscale, p_offset_start );
270 * Perform conversion
272 i_scale_count = ( i_vscale == 1 ) ?
273 p_filter->fmt_out.video.i_height :
274 p_filter->fmt_in.video.i_height;
275 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
277 p_pic_start = p_pic;
278 p_buffer = b_hscale ? p_buffer_start : p_pic;
280 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
282 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
283 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
284 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
285 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
288 /* Here we do some unaligned reads and duplicate conversions, but
289 * at least we have all the pixels */
290 if( i_rewind )
292 p_y -= i_rewind;
293 p_u -= i_rewind >> 1;
294 p_v -= i_rewind >> 1;
295 p_buffer -= i_rewind;
297 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
298 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
299 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
300 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
302 SCALE_WIDTH;
303 SCALE_HEIGHT( 420, 2 );
305 p_y += i_source_margin;
306 if( i_y % 2 )
308 p_u += i_source_margin_c;
309 p_v += i_source_margin_c;
314 #else // ! defined (MODULE_NAME_IS_i420_rgb)
316 void I420_R5G5B5( filter_t *p_filter, picture_t *p_src,
317 picture_t *p_dest )
319 /* We got this one from the old arguments */
320 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
321 uint8_t *p_y = p_src->Y_PIXELS;
322 uint8_t *p_u = p_src->U_PIXELS;
323 uint8_t *p_v = p_src->V_PIXELS;
325 bool b_hscale; /* horizontal scaling type */
326 unsigned int i_vscale; /* vertical scaling type */
327 unsigned int i_x, i_y; /* horizontal and vertical indexes */
329 int i_right_margin;
330 int i_rewind;
331 int i_scale_count; /* scale modulo counter */
332 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
333 uint16_t * p_pic_start; /* beginning of the current line for copy */
335 /* Conversion buffer pointer */
336 uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
337 uint16_t * p_buffer;
339 /* Offset array pointer */
340 int * p_offset_start = p_filter->p_sys->p_offset;
341 int * p_offset;
343 const int i_source_margin = p_src->p[0].i_pitch
344 - p_src->p[0].i_visible_pitch;
345 const int i_source_margin_c = p_src->p[1].i_pitch
346 - p_src->p[1].i_visible_pitch;
348 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
350 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
351 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
352 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
353 SetOffset( p_filter->fmt_in.video.i_width,
354 p_filter->fmt_in.video.i_height,
355 p_filter->fmt_out.video.i_width,
356 p_filter->fmt_out.video.i_height,
357 &b_hscale, &i_vscale, p_offset_start );
361 * Perform conversion
363 i_scale_count = ( i_vscale == 1 ) ?
364 p_filter->fmt_out.video.i_height :
365 p_filter->fmt_in.video.i_height;
367 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
369 if( p_filter->fmt_in.video.i_width & 15 )
371 i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
373 else
375 i_rewind = 0;
379 ** SSE2 128 bits fetch/store instructions are faster
380 ** if memory access is 16 bytes aligned
383 p_buffer = b_hscale ? p_buffer_start : p_pic;
384 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
385 p_dest->p->i_pitch|
386 ((intptr_t)p_y)|
387 ((intptr_t)p_buffer))) )
389 /* use faster SSE2 aligned fetch and store */
390 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
392 p_pic_start = p_pic;
394 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
396 SSE2_CALL (
397 SSE2_INIT_16_ALIGNED
398 SSE2_YUV_MUL
399 SSE2_YUV_ADD
400 SSE2_UNPACK_15_ALIGNED
402 p_y += 16;
403 p_u += 8;
404 p_v += 8;
405 p_buffer += 16;
407 /* Here we do some unaligned reads and duplicate conversions, but
408 * at least we have all the pixels */
409 if( i_rewind )
411 p_y -= i_rewind;
412 p_u -= i_rewind >> 1;
413 p_v -= i_rewind >> 1;
414 p_buffer -= i_rewind;
416 SSE2_CALL (
417 SSE2_INIT_16_UNALIGNED
418 SSE2_YUV_MUL
419 SSE2_YUV_ADD
420 SSE2_UNPACK_15_UNALIGNED
422 p_y += 16;
423 p_u += 8;
424 p_v += 8;
426 SCALE_WIDTH;
427 SCALE_HEIGHT( 420, 2 );
429 p_y += i_source_margin;
430 if( i_y % 2 )
432 p_u += i_source_margin_c;
433 p_v += i_source_margin_c;
435 p_buffer = b_hscale ? p_buffer_start : p_pic;
438 else
440 /* use slower SSE2 unaligned fetch and store */
441 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
443 p_pic_start = p_pic;
444 p_buffer = b_hscale ? p_buffer_start : p_pic;
446 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
448 SSE2_CALL (
449 SSE2_INIT_16_UNALIGNED
450 SSE2_YUV_MUL
451 SSE2_YUV_ADD
452 SSE2_UNPACK_15_UNALIGNED
454 p_y += 16;
455 p_u += 8;
456 p_v += 8;
457 p_buffer += 16;
459 /* Here we do some unaligned reads and duplicate conversions, but
460 * at least we have all the pixels */
461 if( i_rewind )
463 p_y -= i_rewind;
464 p_u -= i_rewind >> 1;
465 p_v -= i_rewind >> 1;
466 p_buffer -= i_rewind;
468 SSE2_CALL (
469 SSE2_INIT_16_UNALIGNED
470 SSE2_YUV_MUL
471 SSE2_YUV_ADD
472 SSE2_UNPACK_15_UNALIGNED
474 p_y += 16;
475 p_u += 8;
476 p_v += 8;
478 SCALE_WIDTH;
479 SCALE_HEIGHT( 420, 2 );
481 p_y += i_source_margin;
482 if( i_y % 2 )
484 p_u += i_source_margin_c;
485 p_v += i_source_margin_c;
487 p_buffer = b_hscale ? p_buffer_start : p_pic;
491 /* make sure all SSE2 stores are visible thereafter */
492 SSE2_END;
494 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
496 if( p_filter->fmt_in.video.i_width & 7 )
498 i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
500 else
502 i_rewind = 0;
505 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
507 p_pic_start = p_pic;
508 p_buffer = b_hscale ? p_buffer_start : p_pic;
510 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
512 MMX_CALL (
513 MMX_INIT_16
514 MMX_YUV_MUL
515 MMX_YUV_ADD
516 MMX_UNPACK_15
518 p_y += 8;
519 p_u += 4;
520 p_v += 4;
521 p_buffer += 8;
524 /* Here we do some unaligned reads and duplicate conversions, but
525 * at least we have all the pixels */
526 if( i_rewind )
528 p_y -= i_rewind;
529 p_u -= i_rewind >> 1;
530 p_v -= i_rewind >> 1;
531 p_buffer -= i_rewind;
533 MMX_CALL (
534 MMX_INIT_16
535 MMX_YUV_MUL
536 MMX_YUV_ADD
537 MMX_UNPACK_15
539 p_y += 8;
540 p_u += 4;
541 p_v += 4;
542 p_buffer += 8;
544 SCALE_WIDTH;
545 SCALE_HEIGHT( 420, 2 );
547 p_y += i_source_margin;
548 if( i_y % 2 )
550 p_u += i_source_margin_c;
551 p_v += i_source_margin_c;
554 /* re-enable FPU registers */
555 MMX_END;
557 #endif
560 void I420_R5G6B5( filter_t *p_filter, picture_t *p_src,
561 picture_t *p_dest )
563 /* We got this one from the old arguments */
564 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
565 uint8_t *p_y = p_src->Y_PIXELS;
566 uint8_t *p_u = p_src->U_PIXELS;
567 uint8_t *p_v = p_src->V_PIXELS;
569 bool b_hscale; /* horizontal scaling type */
570 unsigned int i_vscale; /* vertical scaling type */
571 unsigned int i_x, i_y; /* horizontal and vertical indexes */
573 int i_right_margin;
574 int i_rewind;
575 int i_scale_count; /* scale modulo counter */
576 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
577 uint16_t * p_pic_start; /* beginning of the current line for copy */
579 /* Conversion buffer pointer */
580 uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
581 uint16_t * p_buffer;
583 /* Offset array pointer */
584 int * p_offset_start = p_filter->p_sys->p_offset;
585 int * p_offset;
587 const int i_source_margin = p_src->p[0].i_pitch
588 - p_src->p[0].i_visible_pitch;
589 const int i_source_margin_c = p_src->p[1].i_pitch
590 - p_src->p[1].i_visible_pitch;
592 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
594 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
595 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
596 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
597 SetOffset( p_filter->fmt_in.video.i_width,
598 p_filter->fmt_in.video.i_height,
599 p_filter->fmt_out.video.i_width,
600 p_filter->fmt_out.video.i_height,
601 &b_hscale, &i_vscale, p_offset_start );
605 * Perform conversion
607 i_scale_count = ( i_vscale == 1 ) ?
608 p_filter->fmt_out.video.i_height :
609 p_filter->fmt_in.video.i_height;
611 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
613 if( p_filter->fmt_in.video.i_width & 15 )
615 i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
617 else
619 i_rewind = 0;
623 ** SSE2 128 bits fetch/store instructions are faster
624 ** if memory access is 16 bytes aligned
627 p_buffer = b_hscale ? p_buffer_start : p_pic;
628 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
629 p_dest->p->i_pitch|
630 ((intptr_t)p_y)|
631 ((intptr_t)p_buffer))) )
633 /* use faster SSE2 aligned fetch and store */
634 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
636 p_pic_start = p_pic;
638 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
640 SSE2_CALL (
641 SSE2_INIT_16_ALIGNED
642 SSE2_YUV_MUL
643 SSE2_YUV_ADD
644 SSE2_UNPACK_16_ALIGNED
646 p_y += 16;
647 p_u += 8;
648 p_v += 8;
649 p_buffer += 16;
651 /* Here we do some unaligned reads and duplicate conversions, but
652 * at least we have all the pixels */
653 if( i_rewind )
655 p_y -= i_rewind;
656 p_u -= i_rewind >> 1;
657 p_v -= i_rewind >> 1;
658 p_buffer -= i_rewind;
660 SSE2_CALL (
661 SSE2_INIT_16_UNALIGNED
662 SSE2_YUV_MUL
663 SSE2_YUV_ADD
664 SSE2_UNPACK_16_UNALIGNED
666 p_y += 16;
667 p_u += 8;
668 p_v += 8;
670 SCALE_WIDTH;
671 SCALE_HEIGHT( 420, 2 );
673 p_y += i_source_margin;
674 if( i_y % 2 )
676 p_u += i_source_margin_c;
677 p_v += i_source_margin_c;
679 p_buffer = b_hscale ? p_buffer_start : p_pic;
682 else
684 /* use slower SSE2 unaligned fetch and store */
685 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
687 p_pic_start = p_pic;
688 p_buffer = b_hscale ? p_buffer_start : p_pic;
690 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
692 SSE2_CALL(
693 SSE2_INIT_16_UNALIGNED
694 SSE2_YUV_MUL
695 SSE2_YUV_ADD
696 SSE2_UNPACK_16_UNALIGNED
698 p_y += 16;
699 p_u += 8;
700 p_v += 8;
701 p_buffer += 16;
703 /* Here we do some unaligned reads and duplicate conversions, but
704 * at least we have all the pixels */
705 if( i_rewind )
707 p_y -= i_rewind;
708 p_u -= i_rewind >> 1;
709 p_v -= i_rewind >> 1;
710 p_buffer -= i_rewind;
712 SSE2_CALL(
713 SSE2_INIT_16_UNALIGNED
714 SSE2_YUV_MUL
715 SSE2_YUV_ADD
716 SSE2_UNPACK_16_UNALIGNED
718 p_y += 16;
719 p_u += 8;
720 p_v += 8;
722 SCALE_WIDTH;
723 SCALE_HEIGHT( 420, 2 );
725 p_y += i_source_margin;
726 if( i_y % 2 )
728 p_u += i_source_margin_c;
729 p_v += i_source_margin_c;
731 p_buffer = b_hscale ? p_buffer_start : p_pic;
735 /* make sure all SSE2 stores are visible thereafter */
736 SSE2_END;
738 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
740 if( p_filter->fmt_in.video.i_width & 7 )
742 i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
744 else
746 i_rewind = 0;
749 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
751 p_pic_start = p_pic;
752 p_buffer = b_hscale ? p_buffer_start : p_pic;
754 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
756 MMX_CALL (
757 MMX_INIT_16
758 MMX_YUV_MUL
759 MMX_YUV_ADD
760 MMX_UNPACK_16
762 p_y += 8;
763 p_u += 4;
764 p_v += 4;
765 p_buffer += 8;
768 /* Here we do some unaligned reads and duplicate conversions, but
769 * at least we have all the pixels */
770 if( i_rewind )
772 p_y -= i_rewind;
773 p_u -= i_rewind >> 1;
774 p_v -= i_rewind >> 1;
775 p_buffer -= i_rewind;
777 MMX_CALL (
778 MMX_INIT_16
779 MMX_YUV_MUL
780 MMX_YUV_ADD
781 MMX_UNPACK_16
783 p_y += 8;
784 p_u += 4;
785 p_v += 4;
786 p_buffer += 8;
788 SCALE_WIDTH;
789 SCALE_HEIGHT( 420, 2 );
791 p_y += i_source_margin;
792 if( i_y % 2 )
794 p_u += i_source_margin_c;
795 p_v += i_source_margin_c;
798 /* re-enable FPU registers */
799 MMX_END;
801 #endif
804 #endif
806 /*****************************************************************************
807 * I420_RGB32: color YUV 4:2:0 to RGB 32 bpp
808 *****************************************************************************
809 * Horizontal alignment needed:
810 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
811 * - output: 1 pixel (2 bytes), margins allowed
812 * Vertical alignment needed:
813 * - input: 2 lines (2 Y lines, 1 U/V line)
814 * - output: 1 line
815 *****************************************************************************/
817 #if defined (MODULE_NAME_IS_i420_rgb)
819 void I420_RGB32( filter_t *p_filter, picture_t *p_src,
820 picture_t *p_dest )
822 /* We got this one from the old arguments */
823 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
824 uint8_t *p_y = p_src->Y_PIXELS;
825 uint8_t *p_u = p_src->U_PIXELS;
826 uint8_t *p_v = p_src->V_PIXELS;
828 bool b_hscale; /* horizontal scaling type */
829 unsigned int i_vscale; /* vertical scaling type */
830 unsigned int i_x, i_y; /* horizontal and vertical indexes */
832 int i_right_margin;
833 int i_rewind;
834 int i_scale_count; /* scale modulo counter */
835 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
836 uint32_t * p_pic_start; /* beginning of the current line for copy */
837 int i_uval, i_vval; /* U and V samples */
838 int i_red, i_green, i_blue; /* U and V modified samples */
839 uint32_t * p_yuv = p_filter->p_sys->p_rgb32;
840 uint32_t * p_ybase; /* Y dependant conversion table */
842 /* Conversion buffer pointer */
843 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
844 uint32_t * p_buffer;
846 /* Offset array pointer */
847 int * p_offset_start = p_filter->p_sys->p_offset;
848 int * p_offset;
850 const int i_source_margin = p_src->p[0].i_pitch
851 - p_src->p[0].i_visible_pitch;
852 const int i_source_margin_c = p_src->p[1].i_pitch
853 - p_src->p[1].i_visible_pitch;
855 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
857 if( p_filter->fmt_in.video.i_width & 7 )
859 i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
861 else
863 i_rewind = 0;
866 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
867 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
868 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
869 SetOffset( p_filter->fmt_in.video.i_width,
870 p_filter->fmt_in.video.i_height,
871 p_filter->fmt_out.video.i_width,
872 p_filter->fmt_out.video.i_height,
873 &b_hscale, &i_vscale, p_offset_start );
876 * Perform conversion
878 i_scale_count = ( i_vscale == 1 ) ?
879 p_filter->fmt_out.video.i_height :
880 p_filter->fmt_in.video.i_height;
881 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
883 p_pic_start = p_pic;
884 p_buffer = b_hscale ? p_buffer_start : p_pic;
886 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
888 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
889 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
890 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
891 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
894 /* Here we do some unaligned reads and duplicate conversions, but
895 * at least we have all the pixels */
896 if( i_rewind )
898 p_y -= i_rewind;
899 p_u -= i_rewind >> 1;
900 p_v -= i_rewind >> 1;
901 p_buffer -= i_rewind;
902 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
903 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
904 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
905 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
907 SCALE_WIDTH;
908 SCALE_HEIGHT( 420, 4 );
910 p_y += i_source_margin;
911 if( i_y % 2 )
913 p_u += i_source_margin_c;
914 p_v += i_source_margin_c;
919 #else // defined (MODULE_NAME_IS_i420_rgb_mmx) || defined (MODULE_NAME_IS_i420_rgb_sse2)
921 void I420_A8R8G8B8( filter_t *p_filter, picture_t *p_src,
922 picture_t *p_dest )
924 /* We got this one from the old arguments */
925 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
926 uint8_t *p_y = p_src->Y_PIXELS;
927 uint8_t *p_u = p_src->U_PIXELS;
928 uint8_t *p_v = p_src->V_PIXELS;
930 bool b_hscale; /* horizontal scaling type */
931 unsigned int i_vscale; /* vertical scaling type */
932 unsigned int i_x, i_y; /* horizontal and vertical indexes */
934 int i_right_margin;
935 int i_rewind;
936 int i_scale_count; /* scale modulo counter */
937 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
938 uint32_t * p_pic_start; /* beginning of the current line for copy */
939 /* Conversion buffer pointer */
940 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
941 uint32_t * p_buffer;
943 /* Offset array pointer */
944 int * p_offset_start = p_filter->p_sys->p_offset;
945 int * p_offset;
947 const int i_source_margin = p_src->p[0].i_pitch
948 - p_src->p[0].i_visible_pitch;
949 const int i_source_margin_c = p_src->p[1].i_pitch
950 - p_src->p[1].i_visible_pitch;
952 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
954 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
955 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
956 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
957 SetOffset( p_filter->fmt_in.video.i_width,
958 p_filter->fmt_in.video.i_height,
959 p_filter->fmt_out.video.i_width,
960 p_filter->fmt_out.video.i_height,
961 &b_hscale, &i_vscale, p_offset_start );
964 * Perform conversion
966 i_scale_count = ( i_vscale == 1 ) ?
967 p_filter->fmt_out.video.i_height :
968 p_filter->fmt_in.video.i_height;
970 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
972 if( p_filter->fmt_in.video.i_width & 15 )
974 i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
976 else
978 i_rewind = 0;
982 ** SSE2 128 bits fetch/store instructions are faster
983 ** if memory access is 16 bytes aligned
986 p_buffer = b_hscale ? p_buffer_start : p_pic;
987 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
988 p_dest->p->i_pitch|
989 ((intptr_t)p_y)|
990 ((intptr_t)p_buffer))) )
992 /* use faster SSE2 aligned fetch and store */
993 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
995 p_pic_start = p_pic;
997 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
999 SSE2_CALL (
1000 SSE2_INIT_32_ALIGNED
1001 SSE2_YUV_MUL
1002 SSE2_YUV_ADD
1003 SSE2_UNPACK_32_ARGB_ALIGNED
1005 p_y += 16;
1006 p_u += 8;
1007 p_v += 8;
1008 p_buffer += 16;
1011 /* Here we do some unaligned reads and duplicate conversions, but
1012 * at least we have all the pixels */
1013 if( i_rewind )
1015 p_y -= i_rewind;
1016 p_u -= i_rewind >> 1;
1017 p_v -= i_rewind >> 1;
1018 p_buffer -= i_rewind;
1019 SSE2_CALL (
1020 SSE2_INIT_32_UNALIGNED
1021 SSE2_YUV_MUL
1022 SSE2_YUV_ADD
1023 SSE2_UNPACK_32_ARGB_UNALIGNED
1025 p_y += 16;
1026 p_u += 4;
1027 p_v += 4;
1029 SCALE_WIDTH;
1030 SCALE_HEIGHT( 420, 4 );
1032 p_y += i_source_margin;
1033 if( i_y % 2 )
1035 p_u += i_source_margin_c;
1036 p_v += i_source_margin_c;
1038 p_buffer = b_hscale ? p_buffer_start : p_pic;
1041 else
1043 /* use slower SSE2 unaligned fetch and store */
1044 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1046 p_pic_start = p_pic;
1047 p_buffer = b_hscale ? p_buffer_start : p_pic;
1049 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1051 SSE2_CALL (
1052 SSE2_INIT_32_UNALIGNED
1053 SSE2_YUV_MUL
1054 SSE2_YUV_ADD
1055 SSE2_UNPACK_32_ARGB_UNALIGNED
1057 p_y += 16;
1058 p_u += 8;
1059 p_v += 8;
1060 p_buffer += 16;
1063 /* Here we do some unaligned reads and duplicate conversions, but
1064 * at least we have all the pixels */
1065 if( i_rewind )
1067 p_y -= i_rewind;
1068 p_u -= i_rewind >> 1;
1069 p_v -= i_rewind >> 1;
1070 p_buffer -= i_rewind;
1071 SSE2_CALL (
1072 SSE2_INIT_32_UNALIGNED
1073 SSE2_YUV_MUL
1074 SSE2_YUV_ADD
1075 SSE2_UNPACK_32_ARGB_UNALIGNED
1077 p_y += 16;
1078 p_u += 8;
1079 p_v += 8;
1081 SCALE_WIDTH;
1082 SCALE_HEIGHT( 420, 4 );
1084 p_y += i_source_margin;
1085 if( i_y % 2 )
1087 p_u += i_source_margin_c;
1088 p_v += i_source_margin_c;
1090 p_buffer = b_hscale ? p_buffer_start : p_pic;
1094 /* make sure all SSE2 stores are visible thereafter */
1095 SSE2_END;
1097 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
1099 if( p_filter->fmt_in.video.i_width & 7 )
1101 i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
1103 else
1105 i_rewind = 0;
1108 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1110 p_pic_start = p_pic;
1111 p_buffer = b_hscale ? p_buffer_start : p_pic;
1113 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1115 MMX_CALL (
1116 MMX_INIT_32
1117 MMX_YUV_MUL
1118 MMX_YUV_ADD
1119 MMX_UNPACK_32_ARGB
1121 p_y += 8;
1122 p_u += 4;
1123 p_v += 4;
1124 p_buffer += 8;
1127 /* Here we do some unaligned reads and duplicate conversions, but
1128 * at least we have all the pixels */
1129 if( i_rewind )
1131 p_y -= i_rewind;
1132 p_u -= i_rewind >> 1;
1133 p_v -= i_rewind >> 1;
1134 p_buffer -= i_rewind;
1135 MMX_CALL (
1136 MMX_INIT_32
1137 MMX_YUV_MUL
1138 MMX_YUV_ADD
1139 MMX_UNPACK_32_ARGB
1141 p_y += 8;
1142 p_u += 4;
1143 p_v += 4;
1144 p_buffer += 8;
1146 SCALE_WIDTH;
1147 SCALE_HEIGHT( 420, 4 );
1149 p_y += i_source_margin;
1150 if( i_y % 2 )
1152 p_u += i_source_margin_c;
1153 p_v += i_source_margin_c;
1157 /* re-enable FPU registers */
1158 MMX_END;
1160 #endif
1163 void I420_R8G8B8A8( filter_t *p_filter, picture_t *p_src,
1164 picture_t *p_dest )
1166 /* We got this one from the old arguments */
1167 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1168 uint8_t *p_y = p_src->Y_PIXELS;
1169 uint8_t *p_u = p_src->U_PIXELS;
1170 uint8_t *p_v = p_src->V_PIXELS;
1172 bool b_hscale; /* horizontal scaling type */
1173 unsigned int i_vscale; /* vertical scaling type */
1174 unsigned int i_x, i_y; /* horizontal and vertical indexes */
1176 int i_right_margin;
1177 int i_rewind;
1178 int i_scale_count; /* scale modulo counter */
1179 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
1180 uint32_t * p_pic_start; /* beginning of the current line for copy */
1181 /* Conversion buffer pointer */
1182 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
1183 uint32_t * p_buffer;
1185 /* Offset array pointer */
1186 int * p_offset_start = p_filter->p_sys->p_offset;
1187 int * p_offset;
1189 const int i_source_margin = p_src->p[0].i_pitch
1190 - p_src->p[0].i_visible_pitch;
1191 const int i_source_margin_c = p_src->p[1].i_pitch
1192 - p_src->p[1].i_visible_pitch;
1194 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1196 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1197 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1198 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1199 SetOffset( p_filter->fmt_in.video.i_width,
1200 p_filter->fmt_in.video.i_height,
1201 p_filter->fmt_out.video.i_width,
1202 p_filter->fmt_out.video.i_height,
1203 &b_hscale, &i_vscale, p_offset_start );
1206 * Perform conversion
1208 i_scale_count = ( i_vscale == 1 ) ?
1209 p_filter->fmt_out.video.i_height :
1210 p_filter->fmt_in.video.i_height;
1212 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1214 if( p_filter->fmt_in.video.i_width & 15 )
1216 i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
1218 else
1220 i_rewind = 0;
1224 ** SSE2 128 bits fetch/store instructions are faster
1225 ** if memory access is 16 bytes aligned
1228 p_buffer = b_hscale ? p_buffer_start : p_pic;
1229 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1230 p_dest->p->i_pitch|
1231 ((intptr_t)p_y)|
1232 ((intptr_t)p_buffer))) )
1234 /* use faster SSE2 aligned fetch and store */
1235 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1237 p_pic_start = p_pic;
1239 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1241 SSE2_CALL (
1242 SSE2_INIT_32_ALIGNED
1243 SSE2_YUV_MUL
1244 SSE2_YUV_ADD
1245 SSE2_UNPACK_32_RGBA_ALIGNED
1247 p_y += 16;
1248 p_u += 8;
1249 p_v += 8;
1250 p_buffer += 16;
1253 /* Here we do some unaligned reads and duplicate conversions, but
1254 * at least we have all the pixels */
1255 if( i_rewind )
1257 p_y -= i_rewind;
1258 p_u -= i_rewind >> 1;
1259 p_v -= i_rewind >> 1;
1260 p_buffer -= i_rewind;
1261 SSE2_CALL (
1262 SSE2_INIT_32_UNALIGNED
1263 SSE2_YUV_MUL
1264 SSE2_YUV_ADD
1265 SSE2_UNPACK_32_RGBA_UNALIGNED
1267 p_y += 16;
1268 p_u += 4;
1269 p_v += 4;
1271 SCALE_WIDTH;
1272 SCALE_HEIGHT( 420, 4 );
1274 p_y += i_source_margin;
1275 if( i_y % 2 )
1277 p_u += i_source_margin_c;
1278 p_v += i_source_margin_c;
1280 p_buffer = b_hscale ? p_buffer_start : p_pic;
1283 else
1285 /* use slower SSE2 unaligned fetch and store */
1286 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1288 p_pic_start = p_pic;
1289 p_buffer = b_hscale ? p_buffer_start : p_pic;
1291 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1293 SSE2_CALL (
1294 SSE2_INIT_32_UNALIGNED
1295 SSE2_YUV_MUL
1296 SSE2_YUV_ADD
1297 SSE2_UNPACK_32_RGBA_UNALIGNED
1299 p_y += 16;
1300 p_u += 8;
1301 p_v += 8;
1302 p_buffer += 16;
1305 /* Here we do some unaligned reads and duplicate conversions, but
1306 * at least we have all the pixels */
1307 if( i_rewind )
1309 p_y -= i_rewind;
1310 p_u -= i_rewind >> 1;
1311 p_v -= i_rewind >> 1;
1312 p_buffer -= i_rewind;
1313 SSE2_CALL (
1314 SSE2_INIT_32_UNALIGNED
1315 SSE2_YUV_MUL
1316 SSE2_YUV_ADD
1317 SSE2_UNPACK_32_RGBA_UNALIGNED
1319 p_y += 16;
1320 p_u += 8;
1321 p_v += 8;
1323 SCALE_WIDTH;
1324 SCALE_HEIGHT( 420, 4 );
1326 p_y += i_source_margin;
1327 if( i_y % 2 )
1329 p_u += i_source_margin_c;
1330 p_v += i_source_margin_c;
1332 p_buffer = b_hscale ? p_buffer_start : p_pic;
1336 /* make sure all SSE2 stores are visible thereafter */
1337 SSE2_END;
1339 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
1341 if( p_filter->fmt_in.video.i_width & 7 )
1343 i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
1345 else
1347 i_rewind = 0;
1350 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1352 p_pic_start = p_pic;
1353 p_buffer = b_hscale ? p_buffer_start : p_pic;
1355 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1357 MMX_CALL (
1358 MMX_INIT_32
1359 MMX_YUV_MUL
1360 MMX_YUV_ADD
1361 MMX_UNPACK_32_RGBA
1363 p_y += 8;
1364 p_u += 4;
1365 p_v += 4;
1366 p_buffer += 8;
1369 /* Here we do some unaligned reads and duplicate conversions, but
1370 * at least we have all the pixels */
1371 if( i_rewind )
1373 p_y -= i_rewind;
1374 p_u -= i_rewind >> 1;
1375 p_v -= i_rewind >> 1;
1376 p_buffer -= i_rewind;
1377 MMX_CALL (
1378 MMX_INIT_32
1379 MMX_YUV_MUL
1380 MMX_YUV_ADD
1381 MMX_UNPACK_32_RGBA
1383 p_y += 8;
1384 p_u += 4;
1385 p_v += 4;
1386 p_buffer += 8;
1388 SCALE_WIDTH;
1389 SCALE_HEIGHT( 420, 4 );
1391 p_y += i_source_margin;
1392 if( i_y % 2 )
1394 p_u += i_source_margin_c;
1395 p_v += i_source_margin_c;
1399 /* re-enable FPU registers */
1400 MMX_END;
1402 #endif
1405 void I420_B8G8R8A8( filter_t *p_filter, picture_t *p_src,
1406 picture_t *p_dest )
1408 /* We got this one from the old arguments */
1409 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1410 uint8_t *p_y = p_src->Y_PIXELS;
1411 uint8_t *p_u = p_src->U_PIXELS;
1412 uint8_t *p_v = p_src->V_PIXELS;
1414 bool b_hscale; /* horizontal scaling type */
1415 unsigned int i_vscale; /* vertical scaling type */
1416 unsigned int i_x, i_y; /* horizontal and vertical indexes */
1418 int i_right_margin;
1419 int i_rewind;
1420 int i_scale_count; /* scale modulo counter */
1421 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
1422 uint32_t * p_pic_start; /* beginning of the current line for copy */
1423 /* Conversion buffer pointer */
1424 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
1425 uint32_t * p_buffer;
1427 /* Offset array pointer */
1428 int * p_offset_start = p_filter->p_sys->p_offset;
1429 int * p_offset;
1431 const int i_source_margin = p_src->p[0].i_pitch
1432 - p_src->p[0].i_visible_pitch;
1433 const int i_source_margin_c = p_src->p[1].i_pitch
1434 - p_src->p[1].i_visible_pitch;
1436 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1438 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1439 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1440 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1441 SetOffset( p_filter->fmt_in.video.i_width,
1442 p_filter->fmt_in.video.i_height,
1443 p_filter->fmt_out.video.i_width,
1444 p_filter->fmt_out.video.i_height,
1445 &b_hscale, &i_vscale, p_offset_start );
1448 * Perform conversion
1450 i_scale_count = ( i_vscale == 1 ) ?
1451 p_filter->fmt_out.video.i_height :
1452 p_filter->fmt_in.video.i_height;
1454 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1456 if( p_filter->fmt_in.video.i_width & 15 )
1458 i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
1460 else
1462 i_rewind = 0;
1466 ** SSE2 128 bits fetch/store instructions are faster
1467 ** if memory access is 16 bytes aligned
1470 p_buffer = b_hscale ? p_buffer_start : p_pic;
1471 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1472 p_dest->p->i_pitch|
1473 ((intptr_t)p_y)|
1474 ((intptr_t)p_buffer))) )
1476 /* use faster SSE2 aligned fetch and store */
1477 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1479 p_pic_start = p_pic;
1481 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1483 SSE2_CALL (
1484 SSE2_INIT_32_ALIGNED
1485 SSE2_YUV_MUL
1486 SSE2_YUV_ADD
1487 SSE2_UNPACK_32_BGRA_ALIGNED
1489 p_y += 16;
1490 p_u += 8;
1491 p_v += 8;
1492 p_buffer += 16;
1495 /* Here we do some unaligned reads and duplicate conversions, but
1496 * at least we have all the pixels */
1497 if( i_rewind )
1499 p_y -= i_rewind;
1500 p_u -= i_rewind >> 1;
1501 p_v -= i_rewind >> 1;
1502 p_buffer -= i_rewind;
1503 SSE2_CALL (
1504 SSE2_INIT_32_UNALIGNED
1505 SSE2_YUV_MUL
1506 SSE2_YUV_ADD
1507 SSE2_UNPACK_32_BGRA_UNALIGNED
1509 p_y += 16;
1510 p_u += 4;
1511 p_v += 4;
1513 SCALE_WIDTH;
1514 SCALE_HEIGHT( 420, 4 );
1516 p_y += i_source_margin;
1517 if( i_y % 2 )
1519 p_u += i_source_margin_c;
1520 p_v += i_source_margin_c;
1522 p_buffer = b_hscale ? p_buffer_start : p_pic;
1525 else
1527 /* use slower SSE2 unaligned fetch and store */
1528 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1530 p_pic_start = p_pic;
1531 p_buffer = b_hscale ? p_buffer_start : p_pic;
1533 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1535 SSE2_CALL (
1536 SSE2_INIT_32_UNALIGNED
1537 SSE2_YUV_MUL
1538 SSE2_YUV_ADD
1539 SSE2_UNPACK_32_BGRA_UNALIGNED
1541 p_y += 16;
1542 p_u += 8;
1543 p_v += 8;
1544 p_buffer += 16;
1547 /* Here we do some unaligned reads and duplicate conversions, but
1548 * at least we have all the pixels */
1549 if( i_rewind )
1551 p_y -= i_rewind;
1552 p_u -= i_rewind >> 1;
1553 p_v -= i_rewind >> 1;
1554 p_buffer -= i_rewind;
1555 SSE2_CALL (
1556 SSE2_INIT_32_UNALIGNED
1557 SSE2_YUV_MUL
1558 SSE2_YUV_ADD
1559 SSE2_UNPACK_32_BGRA_UNALIGNED
1561 p_y += 16;
1562 p_u += 8;
1563 p_v += 8;
1565 SCALE_WIDTH;
1566 SCALE_HEIGHT( 420, 4 );
1568 p_y += i_source_margin;
1569 if( i_y % 2 )
1571 p_u += i_source_margin_c;
1572 p_v += i_source_margin_c;
1574 p_buffer = b_hscale ? p_buffer_start : p_pic;
1578 #else
1580 if( p_filter->fmt_in.video.i_width & 7 )
1582 i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
1584 else
1586 i_rewind = 0;
1589 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1591 p_pic_start = p_pic;
1592 p_buffer = b_hscale ? p_buffer_start : p_pic;
1594 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1596 MMX_CALL (
1597 MMX_INIT_32
1598 MMX_YUV_MUL
1599 MMX_YUV_ADD
1600 MMX_UNPACK_32_BGRA
1602 p_y += 8;
1603 p_u += 4;
1604 p_v += 4;
1605 p_buffer += 8;
1608 /* Here we do some unaligned reads and duplicate conversions, but
1609 * at least we have all the pixels */
1610 if( i_rewind )
1612 p_y -= i_rewind;
1613 p_u -= i_rewind >> 1;
1614 p_v -= i_rewind >> 1;
1615 p_buffer -= i_rewind;
1616 MMX_CALL (
1617 MMX_INIT_32
1618 MMX_YUV_MUL
1619 MMX_YUV_ADD
1620 MMX_UNPACK_32_BGRA
1622 p_y += 8;
1623 p_u += 4;
1624 p_v += 4;
1625 p_buffer += 8;
1627 SCALE_WIDTH;
1628 SCALE_HEIGHT( 420, 4 );
1630 p_y += i_source_margin;
1631 if( i_y % 2 )
1633 p_u += i_source_margin_c;
1634 p_v += i_source_margin_c;
1638 /* re-enable FPU registers */
1639 MMX_END;
1641 #endif
1644 void I420_A8B8G8R8( filter_t *p_filter, picture_t *p_src,
1645 picture_t *p_dest )
1647 /* We got this one from the old arguments */
1648 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1649 uint8_t *p_y = p_src->Y_PIXELS;
1650 uint8_t *p_u = p_src->U_PIXELS;
1651 uint8_t *p_v = p_src->V_PIXELS;
1653 bool b_hscale; /* horizontal scaling type */
1654 unsigned int i_vscale; /* vertical scaling type */
1655 unsigned int i_x, i_y; /* horizontal and vertical indexes */
1657 int i_right_margin;
1658 int i_rewind;
1659 int i_scale_count; /* scale modulo counter */
1660 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
1661 uint32_t * p_pic_start; /* beginning of the current line for copy */
1662 /* Conversion buffer pointer */
1663 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
1664 uint32_t * p_buffer;
1666 /* Offset array pointer */
1667 int * p_offset_start = p_filter->p_sys->p_offset;
1668 int * p_offset;
1670 const int i_source_margin = p_src->p[0].i_pitch
1671 - p_src->p[0].i_visible_pitch;
1672 const int i_source_margin_c = p_src->p[1].i_pitch
1673 - p_src->p[1].i_visible_pitch;
1675 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1677 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1678 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1679 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1680 SetOffset( p_filter->fmt_in.video.i_width,
1681 p_filter->fmt_in.video.i_height,
1682 p_filter->fmt_out.video.i_width,
1683 p_filter->fmt_out.video.i_height,
1684 &b_hscale, &i_vscale, p_offset_start );
1687 * Perform conversion
1689 i_scale_count = ( i_vscale == 1 ) ?
1690 p_filter->fmt_out.video.i_height :
1691 p_filter->fmt_in.video.i_height;
1693 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1695 if( p_filter->fmt_in.video.i_width & 15 )
1697 i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
1699 else
1701 i_rewind = 0;
1705 ** SSE2 128 bits fetch/store instructions are faster
1706 ** if memory access is 16 bytes aligned
1709 p_buffer = b_hscale ? p_buffer_start : p_pic;
1710 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1711 p_dest->p->i_pitch|
1712 ((intptr_t)p_y)|
1713 ((intptr_t)p_buffer))) )
1715 /* use faster SSE2 aligned fetch and store */
1716 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1718 p_pic_start = p_pic;
1720 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1722 SSE2_CALL (
1723 SSE2_INIT_32_ALIGNED
1724 SSE2_YUV_MUL
1725 SSE2_YUV_ADD
1726 SSE2_UNPACK_32_ABGR_ALIGNED
1728 p_y += 16;
1729 p_u += 8;
1730 p_v += 8;
1731 p_buffer += 16;
1734 /* Here we do some unaligned reads and duplicate conversions, but
1735 * at least we have all the pixels */
1736 if( i_rewind )
1738 p_y -= i_rewind;
1739 p_u -= i_rewind >> 1;
1740 p_v -= i_rewind >> 1;
1741 p_buffer -= i_rewind;
1742 SSE2_CALL (
1743 SSE2_INIT_32_UNALIGNED
1744 SSE2_YUV_MUL
1745 SSE2_YUV_ADD
1746 SSE2_UNPACK_32_ABGR_UNALIGNED
1748 p_y += 16;
1749 p_u += 4;
1750 p_v += 4;
1752 SCALE_WIDTH;
1753 SCALE_HEIGHT( 420, 4 );
1755 p_y += i_source_margin;
1756 if( i_y % 2 )
1758 p_u += i_source_margin_c;
1759 p_v += i_source_margin_c;
1761 p_buffer = b_hscale ? p_buffer_start : p_pic;
1764 else
1766 /* use slower SSE2 unaligned fetch and store */
1767 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1769 p_pic_start = p_pic;
1770 p_buffer = b_hscale ? p_buffer_start : p_pic;
1772 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1774 SSE2_CALL (
1775 SSE2_INIT_32_UNALIGNED
1776 SSE2_YUV_MUL
1777 SSE2_YUV_ADD
1778 SSE2_UNPACK_32_ABGR_UNALIGNED
1780 p_y += 16;
1781 p_u += 8;
1782 p_v += 8;
1783 p_buffer += 16;
1786 /* Here we do some unaligned reads and duplicate conversions, but
1787 * at least we have all the pixels */
1788 if( i_rewind )
1790 p_y -= i_rewind;
1791 p_u -= i_rewind >> 1;
1792 p_v -= i_rewind >> 1;
1793 p_buffer -= i_rewind;
1794 SSE2_CALL (
1795 SSE2_INIT_32_UNALIGNED
1796 SSE2_YUV_MUL
1797 SSE2_YUV_ADD
1798 SSE2_UNPACK_32_ABGR_UNALIGNED
1800 p_y += 16;
1801 p_u += 8;
1802 p_v += 8;
1804 SCALE_WIDTH;
1805 SCALE_HEIGHT( 420, 4 );
1807 p_y += i_source_margin;
1808 if( i_y % 2 )
1810 p_u += i_source_margin_c;
1811 p_v += i_source_margin_c;
1813 p_buffer = b_hscale ? p_buffer_start : p_pic;
1817 #else
1819 if( p_filter->fmt_in.video.i_width & 7 )
1821 i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
1823 else
1825 i_rewind = 0;
1828 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1830 p_pic_start = p_pic;
1831 p_buffer = b_hscale ? p_buffer_start : p_pic;
1833 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1835 MMX_CALL (
1836 MMX_INIT_32
1837 MMX_YUV_MUL
1838 MMX_YUV_ADD
1839 MMX_UNPACK_32_ABGR
1841 p_y += 8;
1842 p_u += 4;
1843 p_v += 4;
1844 p_buffer += 8;
1847 /* Here we do some unaligned reads and duplicate conversions, but
1848 * at least we have all the pixels */
1849 if( i_rewind )
1851 p_y -= i_rewind;
1852 p_u -= i_rewind >> 1;
1853 p_v -= i_rewind >> 1;
1854 p_buffer -= i_rewind;
1855 MMX_CALL (
1856 MMX_INIT_32
1857 MMX_YUV_MUL
1858 MMX_YUV_ADD
1859 MMX_UNPACK_32_ABGR
1861 p_y += 8;
1862 p_u += 4;
1863 p_v += 4;
1864 p_buffer += 8;
1866 SCALE_WIDTH;
1867 SCALE_HEIGHT( 420, 4 );
1869 p_y += i_source_margin;
1870 if( i_y % 2 )
1872 p_u += i_source_margin_c;
1873 p_v += i_source_margin_c;
1877 /* re-enable FPU registers */
1878 MMX_END;
1880 #endif
1883 #endif
1885 /* Following functions are local */
1887 /*****************************************************************************
1888 * SetOffset: build offset array for conversion functions
1889 *****************************************************************************
1890 * This function will build an offset array used in later conversion functions.
1891 * It will also set horizontal and vertical scaling indicators.
1892 *****************************************************************************/
1893 static void SetOffset( int i_width, int i_height, int i_pic_width,
1894 int i_pic_height, bool *pb_hscale,
1895 unsigned int *pi_vscale, int *p_offset )
1897 int i_x; /* x position in destination */
1898 int i_scale_count; /* modulo counter */
1901 * Prepare horizontal offset array
1903 if( i_pic_width - i_width == 0 )
1905 /* No horizontal scaling: YUV conversion is done directly to picture */
1906 *pb_hscale = 0;
1908 else if( i_pic_width - i_width > 0 )
1910 /* Prepare scaling array for horizontal extension */
1911 *pb_hscale = 1;
1912 i_scale_count = i_pic_width;
1913 for( i_x = i_width; i_x--; )
1915 while( (i_scale_count -= i_width) > 0 )
1917 *p_offset++ = 0;
1919 *p_offset++ = 1;
1920 i_scale_count += i_pic_width;
1923 else /* if( i_pic_width - i_width < 0 ) */
1925 /* Prepare scaling array for horizontal reduction */
1926 *pb_hscale = 1;
1927 i_scale_count = i_width;
1928 for( i_x = i_pic_width; i_x--; )
1930 *p_offset = 1;
1931 while( (i_scale_count -= i_pic_width) > 0 )
1933 *p_offset += 1;
1935 p_offset++;
1936 i_scale_count += i_width;
1941 * Set vertical scaling indicator
1943 if( i_pic_height - i_height == 0 )
1945 *pi_vscale = 0;
1947 else if( i_pic_height - i_height > 0 )
1949 *pi_vscale = 1;
1951 else /* if( i_pic_height - i_height < 0 ) */
1953 *pi_vscale = -1;