Clarify live555 version error
[vlc/asuraparaju-public.git] / modules / video_chroma / i420_rgb16.c
blobc60b15b9536c039808d5b107e2836b178840dfc6
1 /*****************************************************************************
2 * i420_rgb16.c : YUV to bitmap RGB conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000 the VideoLAN team
5 * $Id$
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damienf@videolan.org>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
26 * Preamble
27 *****************************************************************************/
29 #ifdef HAVE_CONFIG_H
30 # include "config.h"
31 #endif
33 #include <vlc_common.h>
34 #include <vlc_filter.h>
36 #include "i420_rgb.h"
37 #if defined (MODULE_NAME_IS_i420_rgb)
38 # include "i420_rgb_c.h"
39 #elif defined (MODULE_NAME_IS_i420_rgb_mmx)
40 # include "../mmx/i420_rgb_mmx.h"
41 #elif defined (MODULE_NAME_IS_i420_rgb_sse2)
42 # include "../mmx/i420_rgb_mmx.h"
43 #endif
45 static void SetOffset( int, int, int, int, bool *,
46 unsigned int *, int * );
48 #if defined (MODULE_NAME_IS_i420_rgb)
49 /*****************************************************************************
50 * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp with dithering
51 *****************************************************************************
52 * Horizontal alignment needed:
53 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
54 * - output: 1 pixel (2 bytes), margins allowed
55 * Vertical alignment needed:
56 * - input: 2 lines (2 Y lines, 1 U/V line)
57 * - output: 1 line
58 *****************************************************************************/
59 void I420_RGB16_dither( filter_t *p_filter, picture_t *p_src,
60 picture_t *p_dest )
62 /* We got this one from the old arguments */
63 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
64 uint8_t *p_y = p_src->Y_PIXELS;
65 uint8_t *p_u = p_src->U_PIXELS;
66 uint8_t *p_v = p_src->V_PIXELS;
68 bool b_hscale; /* horizontal scaling type */
69 unsigned int i_vscale; /* vertical scaling type */
70 unsigned int i_x, i_y; /* horizontal and vertical indexes */
71 unsigned int i_real_y; /* y % 4 */
73 int i_right_margin;
74 int i_rewind;
75 int i_scale_count; /* scale modulo counter */
76 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
77 uint16_t * p_pic_start; /* beginning of the current line for copy */
78 int i_uval, i_vval; /* U and V samples */
79 int i_red, i_green, i_blue; /* U and V modified samples */
80 uint16_t * p_yuv = p_filter->p_sys->p_rgb16;
81 uint16_t * p_ybase; /* Y dependant conversion table */
83 /* Conversion buffer pointer */
84 uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
85 uint16_t * p_buffer;
87 /* Offset array pointer */
88 int * p_offset_start = p_filter->p_sys->p_offset;
89 int * p_offset;
91 const int i_source_margin = p_src->p[0].i_pitch
92 - p_src->p[0].i_visible_pitch;
93 const int i_source_margin_c = p_src->p[1].i_pitch
94 - p_src->p[1].i_visible_pitch;
96 /* The dithering matrices */
97 int dither10[4] = { 0x0, 0x8, 0x2, 0xa };
98 int dither11[4] = { 0xc, 0x4, 0xe, 0x6 };
99 int dither12[4] = { 0x3, 0xb, 0x1, 0x9 };
100 int dither13[4] = { 0xf, 0x7, 0xd, 0x5 };
102 for(i_x = 0; i_x < 4; i_x++)
104 dither10[i_x] = dither10[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
105 dither11[i_x] = dither11[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
106 dither12[i_x] = dither12[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
107 dither13[i_x] = dither13[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
110 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
111 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
113 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
114 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
115 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
116 SetOffset( p_filter->fmt_in.video.i_width,
117 p_filter->fmt_in.video.i_height,
118 p_filter->fmt_out.video.i_width,
119 p_filter->fmt_out.video.i_height,
120 &b_hscale, &i_vscale, p_offset_start );
123 * Perform conversion
125 i_scale_count = ( i_vscale == 1 ) ?
126 p_filter->fmt_out.video.i_height :
127 p_filter->fmt_in.video.i_height;
128 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
130 i_real_y = i_y & 0x3;
131 p_pic_start = p_pic;
132 p_buffer = b_hscale ? p_buffer_start : p_pic;
134 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
136 int *p_dither = dither10;
137 CONVERT_YUV_PIXEL_DITHER(2);
138 p_dither = dither11;
139 CONVERT_Y_PIXEL_DITHER(2);
140 p_dither = dither12;
141 CONVERT_YUV_PIXEL_DITHER(2);
142 p_dither = dither13;
143 CONVERT_Y_PIXEL_DITHER(2);
144 p_dither = dither10;
145 CONVERT_YUV_PIXEL_DITHER(2);
146 p_dither = dither11;
147 CONVERT_Y_PIXEL_DITHER(2);
148 p_dither = dither12;
149 CONVERT_YUV_PIXEL_DITHER(2);
150 p_dither = dither13;
151 CONVERT_Y_PIXEL_DITHER(2);
154 /* Here we do some unaligned reads and duplicate conversions, but
155 * at least we have all the pixels */
156 if( i_rewind )
158 int *p_dither = dither10;
159 p_y -= i_rewind;
160 p_u -= i_rewind >> 1;
161 p_v -= i_rewind >> 1;
162 p_buffer -= i_rewind;
163 CONVERT_YUV_PIXEL_DITHER(2);
164 p_dither = dither11;
165 CONVERT_Y_PIXEL_DITHER(2);
166 p_dither = dither12;
167 CONVERT_YUV_PIXEL_DITHER(2);
168 p_dither = dither13;
169 CONVERT_Y_PIXEL_DITHER(2);
170 p_dither = dither10;
171 CONVERT_YUV_PIXEL_DITHER(2);
172 p_dither = dither11;
173 CONVERT_Y_PIXEL_DITHER(2);
174 p_dither = dither12;
175 CONVERT_YUV_PIXEL_DITHER(2);
176 p_dither = dither13;
177 CONVERT_Y_PIXEL_DITHER(2);
179 SCALE_WIDTH;
180 SCALE_HEIGHT( 420, 2 );
182 p_y += i_source_margin;
183 if( i_y % 2 )
185 p_u += i_source_margin_c;
186 p_v += i_source_margin_c;
190 #endif
192 /*****************************************************************************
193 * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp
194 *****************************************************************************
195 * Horizontal alignment needed:
196 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
197 * - output: 1 pixel (2 bytes), margins allowed
198 * Vertical alignment needed:
199 * - input: 2 lines (2 Y lines, 1 U/V line)
200 * - output: 1 line
201 *****************************************************************************/
203 #if defined (MODULE_NAME_IS_i420_rgb)
205 void I420_RGB16( filter_t *p_filter, picture_t *p_src,
206 picture_t *p_dest )
208 /* We got this one from the old arguments */
209 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
210 uint8_t *p_y = p_src->Y_PIXELS;
211 uint8_t *p_u = p_src->U_PIXELS;
212 uint8_t *p_v = p_src->V_PIXELS;
214 bool b_hscale; /* horizontal scaling type */
215 unsigned int i_vscale; /* vertical scaling type */
216 unsigned int i_x, i_y; /* horizontal and vertical indexes */
218 int i_right_margin;
219 int i_rewind;
220 int i_scale_count; /* scale modulo counter */
221 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
222 uint16_t * p_pic_start; /* beginning of the current line for copy */
223 int i_uval, i_vval; /* U and V samples */
224 int i_red, i_green, i_blue; /* U and V modified samples */
225 uint16_t * p_yuv = p_filter->p_sys->p_rgb16;
226 uint16_t * p_ybase; /* Y dependant conversion table */
228 /* Conversion buffer pointer */
229 uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
230 uint16_t * p_buffer;
232 /* Offset array pointer */
233 int * p_offset_start = p_filter->p_sys->p_offset;
234 int * p_offset;
236 const int i_source_margin = p_src->p[0].i_pitch
237 - p_src->p[0].i_visible_pitch;
238 const int i_source_margin_c = p_src->p[1].i_pitch
239 - p_src->p[1].i_visible_pitch;
241 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
242 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
244 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
245 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
246 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
247 SetOffset( p_filter->fmt_in.video.i_width,
248 p_filter->fmt_in.video.i_height,
249 p_filter->fmt_out.video.i_width,
250 p_filter->fmt_out.video.i_height,
251 &b_hscale, &i_vscale, p_offset_start );
254 * Perform conversion
256 i_scale_count = ( i_vscale == 1 ) ?
257 p_filter->fmt_out.video.i_height :
258 p_filter->fmt_in.video.i_height;
259 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
261 p_pic_start = p_pic;
262 p_buffer = b_hscale ? p_buffer_start : p_pic;
264 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
266 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
267 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
268 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
269 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
272 /* Here we do some unaligned reads and duplicate conversions, but
273 * at least we have all the pixels */
274 if( i_rewind )
276 p_y -= i_rewind;
277 p_u -= i_rewind >> 1;
278 p_v -= i_rewind >> 1;
279 p_buffer -= i_rewind;
281 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
282 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
283 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
284 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
286 SCALE_WIDTH;
287 SCALE_HEIGHT( 420, 2 );
289 p_y += i_source_margin;
290 if( i_y % 2 )
292 p_u += i_source_margin_c;
293 p_v += i_source_margin_c;
298 #else // ! defined (MODULE_NAME_IS_i420_rgb)
300 void I420_R5G5B5( filter_t *p_filter, picture_t *p_src,
301 picture_t *p_dest )
303 /* We got this one from the old arguments */
304 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
305 uint8_t *p_y = p_src->Y_PIXELS;
306 uint8_t *p_u = p_src->U_PIXELS;
307 uint8_t *p_v = p_src->V_PIXELS;
309 bool b_hscale; /* horizontal scaling type */
310 unsigned int i_vscale; /* vertical scaling type */
311 unsigned int i_x, i_y; /* horizontal and vertical indexes */
313 int i_right_margin;
314 int i_rewind;
315 int i_scale_count; /* scale modulo counter */
316 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
317 uint16_t * p_pic_start; /* beginning of the current line for copy */
319 /* Conversion buffer pointer */
320 uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
321 uint16_t * p_buffer;
323 /* Offset array pointer */
324 int * p_offset_start = p_filter->p_sys->p_offset;
325 int * p_offset;
327 const int i_source_margin = p_src->p[0].i_pitch
328 - p_src->p[0].i_visible_pitch;
329 const int i_source_margin_c = p_src->p[1].i_pitch
330 - p_src->p[1].i_visible_pitch;
332 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
334 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
335 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
336 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
337 SetOffset( p_filter->fmt_in.video.i_width,
338 p_filter->fmt_in.video.i_height,
339 p_filter->fmt_out.video.i_width,
340 p_filter->fmt_out.video.i_height,
341 &b_hscale, &i_vscale, p_offset_start );
345 * Perform conversion
347 i_scale_count = ( i_vscale == 1 ) ?
348 p_filter->fmt_out.video.i_height :
349 p_filter->fmt_in.video.i_height;
351 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
353 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
356 ** SSE2 128 bits fetch/store instructions are faster
357 ** if memory access is 16 bytes aligned
360 p_buffer = b_hscale ? p_buffer_start : p_pic;
361 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
362 p_dest->p->i_pitch|
363 ((intptr_t)p_y)|
364 ((intptr_t)p_buffer))) )
366 /* use faster SSE2 aligned fetch and store */
367 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
369 p_pic_start = p_pic;
371 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
373 SSE2_CALL (
374 SSE2_INIT_16_ALIGNED
375 SSE2_YUV_MUL
376 SSE2_YUV_ADD
377 SSE2_UNPACK_15_ALIGNED
379 p_y += 16;
380 p_u += 8;
381 p_v += 8;
382 p_buffer += 16;
384 /* Here we do some unaligned reads and duplicate conversions, but
385 * at least we have all the pixels */
386 if( i_rewind )
388 p_y -= i_rewind;
389 p_u -= i_rewind >> 1;
390 p_v -= i_rewind >> 1;
391 p_buffer -= i_rewind;
393 SSE2_CALL (
394 SSE2_INIT_16_UNALIGNED
395 SSE2_YUV_MUL
396 SSE2_YUV_ADD
397 SSE2_UNPACK_15_UNALIGNED
399 p_y += 16;
400 p_u += 8;
401 p_v += 8;
403 SCALE_WIDTH;
404 SCALE_HEIGHT( 420, 2 );
406 p_y += i_source_margin;
407 if( i_y % 2 )
409 p_u += i_source_margin_c;
410 p_v += i_source_margin_c;
412 p_buffer = b_hscale ? p_buffer_start : p_pic;
415 else
417 /* use slower SSE2 unaligned fetch and store */
418 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
420 p_pic_start = p_pic;
421 p_buffer = b_hscale ? p_buffer_start : p_pic;
423 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
425 SSE2_CALL (
426 SSE2_INIT_16_UNALIGNED
427 SSE2_YUV_MUL
428 SSE2_YUV_ADD
429 SSE2_UNPACK_15_UNALIGNED
431 p_y += 16;
432 p_u += 8;
433 p_v += 8;
434 p_buffer += 16;
436 /* Here we do some unaligned reads and duplicate conversions, but
437 * at least we have all the pixels */
438 if( i_rewind )
440 p_y -= i_rewind;
441 p_u -= i_rewind >> 1;
442 p_v -= i_rewind >> 1;
443 p_buffer -= i_rewind;
445 SSE2_CALL (
446 SSE2_INIT_16_UNALIGNED
447 SSE2_YUV_MUL
448 SSE2_YUV_ADD
449 SSE2_UNPACK_15_UNALIGNED
451 p_y += 16;
452 p_u += 8;
453 p_v += 8;
455 SCALE_WIDTH;
456 SCALE_HEIGHT( 420, 2 );
458 p_y += i_source_margin;
459 if( i_y % 2 )
461 p_u += i_source_margin_c;
462 p_v += i_source_margin_c;
464 p_buffer = b_hscale ? p_buffer_start : p_pic;
468 /* make sure all SSE2 stores are visible thereafter */
469 SSE2_END;
471 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
473 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
475 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
477 p_pic_start = p_pic;
478 p_buffer = b_hscale ? p_buffer_start : p_pic;
480 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
482 MMX_CALL (
483 MMX_INIT_16
484 MMX_YUV_MUL
485 MMX_YUV_ADD
486 MMX_UNPACK_15
488 p_y += 8;
489 p_u += 4;
490 p_v += 4;
491 p_buffer += 8;
494 /* Here we do some unaligned reads and duplicate conversions, but
495 * at least we have all the pixels */
496 if( i_rewind )
498 p_y -= i_rewind;
499 p_u -= i_rewind >> 1;
500 p_v -= i_rewind >> 1;
501 p_buffer -= i_rewind;
503 MMX_CALL (
504 MMX_INIT_16
505 MMX_YUV_MUL
506 MMX_YUV_ADD
507 MMX_UNPACK_15
509 p_y += 8;
510 p_u += 4;
511 p_v += 4;
512 p_buffer += 8;
514 SCALE_WIDTH;
515 SCALE_HEIGHT( 420, 2 );
517 p_y += i_source_margin;
518 if( i_y % 2 )
520 p_u += i_source_margin_c;
521 p_v += i_source_margin_c;
524 /* re-enable FPU registers */
525 MMX_END;
527 #endif
530 void I420_R5G6B5( filter_t *p_filter, picture_t *p_src,
531 picture_t *p_dest )
533 /* We got this one from the old arguments */
534 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
535 uint8_t *p_y = p_src->Y_PIXELS;
536 uint8_t *p_u = p_src->U_PIXELS;
537 uint8_t *p_v = p_src->V_PIXELS;
539 bool b_hscale; /* horizontal scaling type */
540 unsigned int i_vscale; /* vertical scaling type */
541 unsigned int i_x, i_y; /* horizontal and vertical indexes */
543 int i_right_margin;
544 int i_rewind;
545 int i_scale_count; /* scale modulo counter */
546 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
547 uint16_t * p_pic_start; /* beginning of the current line for copy */
549 /* Conversion buffer pointer */
550 uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
551 uint16_t * p_buffer;
553 /* Offset array pointer */
554 int * p_offset_start = p_filter->p_sys->p_offset;
555 int * p_offset;
557 const int i_source_margin = p_src->p[0].i_pitch
558 - p_src->p[0].i_visible_pitch;
559 const int i_source_margin_c = p_src->p[1].i_pitch
560 - p_src->p[1].i_visible_pitch;
562 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
564 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
565 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
566 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
567 SetOffset( p_filter->fmt_in.video.i_width,
568 p_filter->fmt_in.video.i_height,
569 p_filter->fmt_out.video.i_width,
570 p_filter->fmt_out.video.i_height,
571 &b_hscale, &i_vscale, p_offset_start );
575 * Perform conversion
577 i_scale_count = ( i_vscale == 1 ) ?
578 p_filter->fmt_out.video.i_height :
579 p_filter->fmt_in.video.i_height;
581 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
583 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
586 ** SSE2 128 bits fetch/store instructions are faster
587 ** if memory access is 16 bytes aligned
590 p_buffer = b_hscale ? p_buffer_start : p_pic;
591 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
592 p_dest->p->i_pitch|
593 ((intptr_t)p_y)|
594 ((intptr_t)p_buffer))) )
596 /* use faster SSE2 aligned fetch and store */
597 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
599 p_pic_start = p_pic;
601 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
603 SSE2_CALL (
604 SSE2_INIT_16_ALIGNED
605 SSE2_YUV_MUL
606 SSE2_YUV_ADD
607 SSE2_UNPACK_16_ALIGNED
609 p_y += 16;
610 p_u += 8;
611 p_v += 8;
612 p_buffer += 16;
614 /* Here we do some unaligned reads and duplicate conversions, but
615 * at least we have all the pixels */
616 if( i_rewind )
618 p_y -= i_rewind;
619 p_u -= i_rewind >> 1;
620 p_v -= i_rewind >> 1;
621 p_buffer -= i_rewind;
623 SSE2_CALL (
624 SSE2_INIT_16_UNALIGNED
625 SSE2_YUV_MUL
626 SSE2_YUV_ADD
627 SSE2_UNPACK_16_UNALIGNED
629 p_y += 16;
630 p_u += 8;
631 p_v += 8;
633 SCALE_WIDTH;
634 SCALE_HEIGHT( 420, 2 );
636 p_y += i_source_margin;
637 if( i_y % 2 )
639 p_u += i_source_margin_c;
640 p_v += i_source_margin_c;
642 p_buffer = b_hscale ? p_buffer_start : p_pic;
645 else
647 /* use slower SSE2 unaligned fetch and store */
648 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
650 p_pic_start = p_pic;
651 p_buffer = b_hscale ? p_buffer_start : p_pic;
653 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
655 SSE2_CALL(
656 SSE2_INIT_16_UNALIGNED
657 SSE2_YUV_MUL
658 SSE2_YUV_ADD
659 SSE2_UNPACK_16_UNALIGNED
661 p_y += 16;
662 p_u += 8;
663 p_v += 8;
664 p_buffer += 16;
666 /* Here we do some unaligned reads and duplicate conversions, but
667 * at least we have all the pixels */
668 if( i_rewind )
670 p_y -= i_rewind;
671 p_u -= i_rewind >> 1;
672 p_v -= i_rewind >> 1;
673 p_buffer -= i_rewind;
675 SSE2_CALL(
676 SSE2_INIT_16_UNALIGNED
677 SSE2_YUV_MUL
678 SSE2_YUV_ADD
679 SSE2_UNPACK_16_UNALIGNED
681 p_y += 16;
682 p_u += 8;
683 p_v += 8;
685 SCALE_WIDTH;
686 SCALE_HEIGHT( 420, 2 );
688 p_y += i_source_margin;
689 if( i_y % 2 )
691 p_u += i_source_margin_c;
692 p_v += i_source_margin_c;
694 p_buffer = b_hscale ? p_buffer_start : p_pic;
698 /* make sure all SSE2 stores are visible thereafter */
699 SSE2_END;
701 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
703 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
705 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
707 p_pic_start = p_pic;
708 p_buffer = b_hscale ? p_buffer_start : p_pic;
710 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
712 MMX_CALL (
713 MMX_INIT_16
714 MMX_YUV_MUL
715 MMX_YUV_ADD
716 MMX_UNPACK_16
718 p_y += 8;
719 p_u += 4;
720 p_v += 4;
721 p_buffer += 8;
724 /* Here we do some unaligned reads and duplicate conversions, but
725 * at least we have all the pixels */
726 if( i_rewind )
728 p_y -= i_rewind;
729 p_u -= i_rewind >> 1;
730 p_v -= i_rewind >> 1;
731 p_buffer -= i_rewind;
733 MMX_CALL (
734 MMX_INIT_16
735 MMX_YUV_MUL
736 MMX_YUV_ADD
737 MMX_UNPACK_16
739 p_y += 8;
740 p_u += 4;
741 p_v += 4;
742 p_buffer += 8;
744 SCALE_WIDTH;
745 SCALE_HEIGHT( 420, 2 );
747 p_y += i_source_margin;
748 if( i_y % 2 )
750 p_u += i_source_margin_c;
751 p_v += i_source_margin_c;
754 /* re-enable FPU registers */
755 MMX_END;
757 #endif
760 #endif
762 /*****************************************************************************
763 * I420_RGB32: color YUV 4:2:0 to RGB 32 bpp
764 *****************************************************************************
765 * Horizontal alignment needed:
766 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
767 * - output: 1 pixel (2 bytes), margins allowed
768 * Vertical alignment needed:
769 * - input: 2 lines (2 Y lines, 1 U/V line)
770 * - output: 1 line
771 *****************************************************************************/
773 #if defined (MODULE_NAME_IS_i420_rgb)
775 void I420_RGB32( filter_t *p_filter, picture_t *p_src,
776 picture_t *p_dest )
778 /* We got this one from the old arguments */
779 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
780 uint8_t *p_y = p_src->Y_PIXELS;
781 uint8_t *p_u = p_src->U_PIXELS;
782 uint8_t *p_v = p_src->V_PIXELS;
784 bool b_hscale; /* horizontal scaling type */
785 unsigned int i_vscale; /* vertical scaling type */
786 unsigned int i_x, i_y; /* horizontal and vertical indexes */
788 int i_right_margin;
789 int i_rewind;
790 int i_scale_count; /* scale modulo counter */
791 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
792 uint32_t * p_pic_start; /* beginning of the current line for copy */
793 int i_uval, i_vval; /* U and V samples */
794 int i_red, i_green, i_blue; /* U and V modified samples */
795 uint32_t * p_yuv = p_filter->p_sys->p_rgb32;
796 uint32_t * p_ybase; /* Y dependant conversion table */
798 /* Conversion buffer pointer */
799 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
800 uint32_t * p_buffer;
802 /* Offset array pointer */
803 int * p_offset_start = p_filter->p_sys->p_offset;
804 int * p_offset;
806 const int i_source_margin = p_src->p[0].i_pitch
807 - p_src->p[0].i_visible_pitch;
808 const int i_source_margin_c = p_src->p[1].i_pitch
809 - p_src->p[1].i_visible_pitch;
811 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
812 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
814 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
815 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
816 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
817 SetOffset( p_filter->fmt_in.video.i_width,
818 p_filter->fmt_in.video.i_height,
819 p_filter->fmt_out.video.i_width,
820 p_filter->fmt_out.video.i_height,
821 &b_hscale, &i_vscale, p_offset_start );
824 * Perform conversion
826 i_scale_count = ( i_vscale == 1 ) ?
827 p_filter->fmt_out.video.i_height :
828 p_filter->fmt_in.video.i_height;
829 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
831 p_pic_start = p_pic;
832 p_buffer = b_hscale ? p_buffer_start : p_pic;
834 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
836 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
837 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
838 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
839 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
842 /* Here we do some unaligned reads and duplicate conversions, but
843 * at least we have all the pixels */
844 if( i_rewind )
846 p_y -= i_rewind;
847 p_u -= i_rewind >> 1;
848 p_v -= i_rewind >> 1;
849 p_buffer -= i_rewind;
850 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
851 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
852 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
853 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
855 SCALE_WIDTH;
856 SCALE_HEIGHT( 420, 4 );
858 p_y += i_source_margin;
859 if( i_y % 2 )
861 p_u += i_source_margin_c;
862 p_v += i_source_margin_c;
867 #else // defined (MODULE_NAME_IS_i420_rgb_mmx) || defined (MODULE_NAME_IS_i420_rgb_sse2)
869 void I420_A8R8G8B8( filter_t *p_filter, picture_t *p_src,
870 picture_t *p_dest )
872 /* We got this one from the old arguments */
873 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
874 uint8_t *p_y = p_src->Y_PIXELS;
875 uint8_t *p_u = p_src->U_PIXELS;
876 uint8_t *p_v = p_src->V_PIXELS;
878 bool b_hscale; /* horizontal scaling type */
879 unsigned int i_vscale; /* vertical scaling type */
880 unsigned int i_x, i_y; /* horizontal and vertical indexes */
882 int i_right_margin;
883 int i_rewind;
884 int i_scale_count; /* scale modulo counter */
885 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
886 uint32_t * p_pic_start; /* beginning of the current line for copy */
887 /* Conversion buffer pointer */
888 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
889 uint32_t * p_buffer;
891 /* Offset array pointer */
892 int * p_offset_start = p_filter->p_sys->p_offset;
893 int * p_offset;
895 const int i_source_margin = p_src->p[0].i_pitch
896 - p_src->p[0].i_visible_pitch;
897 const int i_source_margin_c = p_src->p[1].i_pitch
898 - p_src->p[1].i_visible_pitch;
900 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
902 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
903 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
904 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
905 SetOffset( p_filter->fmt_in.video.i_width,
906 p_filter->fmt_in.video.i_height,
907 p_filter->fmt_out.video.i_width,
908 p_filter->fmt_out.video.i_height,
909 &b_hscale, &i_vscale, p_offset_start );
912 * Perform conversion
914 i_scale_count = ( i_vscale == 1 ) ?
915 p_filter->fmt_out.video.i_height :
916 p_filter->fmt_in.video.i_height;
918 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
920 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
923 ** SSE2 128 bits fetch/store instructions are faster
924 ** if memory access is 16 bytes aligned
927 p_buffer = b_hscale ? p_buffer_start : p_pic;
928 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
929 p_dest->p->i_pitch|
930 ((intptr_t)p_y)|
931 ((intptr_t)p_buffer))) )
933 /* use faster SSE2 aligned fetch and store */
934 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
936 p_pic_start = p_pic;
938 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
940 SSE2_CALL (
941 SSE2_INIT_32_ALIGNED
942 SSE2_YUV_MUL
943 SSE2_YUV_ADD
944 SSE2_UNPACK_32_ARGB_ALIGNED
946 p_y += 16;
947 p_u += 8;
948 p_v += 8;
949 p_buffer += 16;
952 /* Here we do some unaligned reads and duplicate conversions, but
953 * at least we have all the pixels */
954 if( i_rewind )
956 p_y -= i_rewind;
957 p_u -= i_rewind >> 1;
958 p_v -= i_rewind >> 1;
959 p_buffer -= i_rewind;
960 SSE2_CALL (
961 SSE2_INIT_32_UNALIGNED
962 SSE2_YUV_MUL
963 SSE2_YUV_ADD
964 SSE2_UNPACK_32_ARGB_UNALIGNED
966 p_y += 16;
967 p_u += 4;
968 p_v += 4;
970 SCALE_WIDTH;
971 SCALE_HEIGHT( 420, 4 );
973 p_y += i_source_margin;
974 if( i_y % 2 )
976 p_u += i_source_margin_c;
977 p_v += i_source_margin_c;
979 p_buffer = b_hscale ? p_buffer_start : p_pic;
982 else
984 /* use slower SSE2 unaligned fetch and store */
985 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
987 p_pic_start = p_pic;
988 p_buffer = b_hscale ? p_buffer_start : p_pic;
990 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
992 SSE2_CALL (
993 SSE2_INIT_32_UNALIGNED
994 SSE2_YUV_MUL
995 SSE2_YUV_ADD
996 SSE2_UNPACK_32_ARGB_UNALIGNED
998 p_y += 16;
999 p_u += 8;
1000 p_v += 8;
1001 p_buffer += 16;
1004 /* Here we do some unaligned reads and duplicate conversions, but
1005 * at least we have all the pixels */
1006 if( i_rewind )
1008 p_y -= i_rewind;
1009 p_u -= i_rewind >> 1;
1010 p_v -= i_rewind >> 1;
1011 p_buffer -= i_rewind;
1012 SSE2_CALL (
1013 SSE2_INIT_32_UNALIGNED
1014 SSE2_YUV_MUL
1015 SSE2_YUV_ADD
1016 SSE2_UNPACK_32_ARGB_UNALIGNED
1018 p_y += 16;
1019 p_u += 8;
1020 p_v += 8;
1022 SCALE_WIDTH;
1023 SCALE_HEIGHT( 420, 4 );
1025 p_y += i_source_margin;
1026 if( i_y % 2 )
1028 p_u += i_source_margin_c;
1029 p_v += i_source_margin_c;
1031 p_buffer = b_hscale ? p_buffer_start : p_pic;
1035 /* make sure all SSE2 stores are visible thereafter */
1036 SSE2_END;
1038 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
1040 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
1042 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1044 p_pic_start = p_pic;
1045 p_buffer = b_hscale ? p_buffer_start : p_pic;
1047 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1049 MMX_CALL (
1050 MMX_INIT_32
1051 MMX_YUV_MUL
1052 MMX_YUV_ADD
1053 MMX_UNPACK_32_ARGB
1055 p_y += 8;
1056 p_u += 4;
1057 p_v += 4;
1058 p_buffer += 8;
1061 /* Here we do some unaligned reads and duplicate conversions, but
1062 * at least we have all the pixels */
1063 if( i_rewind )
1065 p_y -= i_rewind;
1066 p_u -= i_rewind >> 1;
1067 p_v -= i_rewind >> 1;
1068 p_buffer -= i_rewind;
1069 MMX_CALL (
1070 MMX_INIT_32
1071 MMX_YUV_MUL
1072 MMX_YUV_ADD
1073 MMX_UNPACK_32_ARGB
1075 p_y += 8;
1076 p_u += 4;
1077 p_v += 4;
1078 p_buffer += 8;
1080 SCALE_WIDTH;
1081 SCALE_HEIGHT( 420, 4 );
1083 p_y += i_source_margin;
1084 if( i_y % 2 )
1086 p_u += i_source_margin_c;
1087 p_v += i_source_margin_c;
1091 /* re-enable FPU registers */
1092 MMX_END;
1094 #endif
1097 void I420_R8G8B8A8( filter_t *p_filter, picture_t *p_src,
1098 picture_t *p_dest )
1100 /* We got this one from the old arguments */
1101 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1102 uint8_t *p_y = p_src->Y_PIXELS;
1103 uint8_t *p_u = p_src->U_PIXELS;
1104 uint8_t *p_v = p_src->V_PIXELS;
1106 bool b_hscale; /* horizontal scaling type */
1107 unsigned int i_vscale; /* vertical scaling type */
1108 unsigned int i_x, i_y; /* horizontal and vertical indexes */
1110 int i_right_margin;
1111 int i_rewind;
1112 int i_scale_count; /* scale modulo counter */
1113 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
1114 uint32_t * p_pic_start; /* beginning of the current line for copy */
1115 /* Conversion buffer pointer */
1116 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
1117 uint32_t * p_buffer;
1119 /* Offset array pointer */
1120 int * p_offset_start = p_filter->p_sys->p_offset;
1121 int * p_offset;
1123 const int i_source_margin = p_src->p[0].i_pitch
1124 - p_src->p[0].i_visible_pitch;
1125 const int i_source_margin_c = p_src->p[1].i_pitch
1126 - p_src->p[1].i_visible_pitch;
1128 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1130 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1131 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1132 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1133 SetOffset( p_filter->fmt_in.video.i_width,
1134 p_filter->fmt_in.video.i_height,
1135 p_filter->fmt_out.video.i_width,
1136 p_filter->fmt_out.video.i_height,
1137 &b_hscale, &i_vscale, p_offset_start );
1140 * Perform conversion
1142 i_scale_count = ( i_vscale == 1 ) ?
1143 p_filter->fmt_out.video.i_height :
1144 p_filter->fmt_in.video.i_height;
1146 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1148 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
1151 ** SSE2 128 bits fetch/store instructions are faster
1152 ** if memory access is 16 bytes aligned
1155 p_buffer = b_hscale ? p_buffer_start : p_pic;
1156 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1157 p_dest->p->i_pitch|
1158 ((intptr_t)p_y)|
1159 ((intptr_t)p_buffer))) )
1161 /* use faster SSE2 aligned fetch and store */
1162 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1164 p_pic_start = p_pic;
1166 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1168 SSE2_CALL (
1169 SSE2_INIT_32_ALIGNED
1170 SSE2_YUV_MUL
1171 SSE2_YUV_ADD
1172 SSE2_UNPACK_32_RGBA_ALIGNED
1174 p_y += 16;
1175 p_u += 8;
1176 p_v += 8;
1177 p_buffer += 16;
1180 /* Here we do some unaligned reads and duplicate conversions, but
1181 * at least we have all the pixels */
1182 if( i_rewind )
1184 p_y -= i_rewind;
1185 p_u -= i_rewind >> 1;
1186 p_v -= i_rewind >> 1;
1187 p_buffer -= i_rewind;
1188 SSE2_CALL (
1189 SSE2_INIT_32_UNALIGNED
1190 SSE2_YUV_MUL
1191 SSE2_YUV_ADD
1192 SSE2_UNPACK_32_RGBA_UNALIGNED
1194 p_y += 16;
1195 p_u += 4;
1196 p_v += 4;
1198 SCALE_WIDTH;
1199 SCALE_HEIGHT( 420, 4 );
1201 p_y += i_source_margin;
1202 if( i_y % 2 )
1204 p_u += i_source_margin_c;
1205 p_v += i_source_margin_c;
1207 p_buffer = b_hscale ? p_buffer_start : p_pic;
1210 else
1212 /* use slower SSE2 unaligned fetch and store */
1213 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1215 p_pic_start = p_pic;
1216 p_buffer = b_hscale ? p_buffer_start : p_pic;
1218 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1220 SSE2_CALL (
1221 SSE2_INIT_32_UNALIGNED
1222 SSE2_YUV_MUL
1223 SSE2_YUV_ADD
1224 SSE2_UNPACK_32_RGBA_UNALIGNED
1226 p_y += 16;
1227 p_u += 8;
1228 p_v += 8;
1229 p_buffer += 16;
1232 /* Here we do some unaligned reads and duplicate conversions, but
1233 * at least we have all the pixels */
1234 if( i_rewind )
1236 p_y -= i_rewind;
1237 p_u -= i_rewind >> 1;
1238 p_v -= i_rewind >> 1;
1239 p_buffer -= i_rewind;
1240 SSE2_CALL (
1241 SSE2_INIT_32_UNALIGNED
1242 SSE2_YUV_MUL
1243 SSE2_YUV_ADD
1244 SSE2_UNPACK_32_RGBA_UNALIGNED
1246 p_y += 16;
1247 p_u += 8;
1248 p_v += 8;
1250 SCALE_WIDTH;
1251 SCALE_HEIGHT( 420, 4 );
1253 p_y += i_source_margin;
1254 if( i_y % 2 )
1256 p_u += i_source_margin_c;
1257 p_v += i_source_margin_c;
1259 p_buffer = b_hscale ? p_buffer_start : p_pic;
1263 /* make sure all SSE2 stores are visible thereafter */
1264 SSE2_END;
1266 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
1268 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
1270 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1272 p_pic_start = p_pic;
1273 p_buffer = b_hscale ? p_buffer_start : p_pic;
1275 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1277 MMX_CALL (
1278 MMX_INIT_32
1279 MMX_YUV_MUL
1280 MMX_YUV_ADD
1281 MMX_UNPACK_32_RGBA
1283 p_y += 8;
1284 p_u += 4;
1285 p_v += 4;
1286 p_buffer += 8;
1289 /* Here we do some unaligned reads and duplicate conversions, but
1290 * at least we have all the pixels */
1291 if( i_rewind )
1293 p_y -= i_rewind;
1294 p_u -= i_rewind >> 1;
1295 p_v -= i_rewind >> 1;
1296 p_buffer -= i_rewind;
1297 MMX_CALL (
1298 MMX_INIT_32
1299 MMX_YUV_MUL
1300 MMX_YUV_ADD
1301 MMX_UNPACK_32_RGBA
1303 p_y += 8;
1304 p_u += 4;
1305 p_v += 4;
1306 p_buffer += 8;
1308 SCALE_WIDTH;
1309 SCALE_HEIGHT( 420, 4 );
1311 p_y += i_source_margin;
1312 if( i_y % 2 )
1314 p_u += i_source_margin_c;
1315 p_v += i_source_margin_c;
1319 /* re-enable FPU registers */
1320 MMX_END;
1322 #endif
1325 void I420_B8G8R8A8( filter_t *p_filter, picture_t *p_src,
1326 picture_t *p_dest )
1328 /* We got this one from the old arguments */
1329 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1330 uint8_t *p_y = p_src->Y_PIXELS;
1331 uint8_t *p_u = p_src->U_PIXELS;
1332 uint8_t *p_v = p_src->V_PIXELS;
1334 bool b_hscale; /* horizontal scaling type */
1335 unsigned int i_vscale; /* vertical scaling type */
1336 unsigned int i_x, i_y; /* horizontal and vertical indexes */
1338 int i_right_margin;
1339 int i_rewind;
1340 int i_scale_count; /* scale modulo counter */
1341 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
1342 uint32_t * p_pic_start; /* beginning of the current line for copy */
1343 /* Conversion buffer pointer */
1344 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
1345 uint32_t * p_buffer;
1347 /* Offset array pointer */
1348 int * p_offset_start = p_filter->p_sys->p_offset;
1349 int * p_offset;
1351 const int i_source_margin = p_src->p[0].i_pitch
1352 - p_src->p[0].i_visible_pitch;
1353 const int i_source_margin_c = p_src->p[1].i_pitch
1354 - p_src->p[1].i_visible_pitch;
1356 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1358 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1359 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1360 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1361 SetOffset( p_filter->fmt_in.video.i_width,
1362 p_filter->fmt_in.video.i_height,
1363 p_filter->fmt_out.video.i_width,
1364 p_filter->fmt_out.video.i_height,
1365 &b_hscale, &i_vscale, p_offset_start );
1368 * Perform conversion
1370 i_scale_count = ( i_vscale == 1 ) ?
1371 p_filter->fmt_out.video.i_height :
1372 p_filter->fmt_in.video.i_height;
1374 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1376 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
1379 ** SSE2 128 bits fetch/store instructions are faster
1380 ** if memory access is 16 bytes aligned
1383 p_buffer = b_hscale ? p_buffer_start : p_pic;
1384 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1385 p_dest->p->i_pitch|
1386 ((intptr_t)p_y)|
1387 ((intptr_t)p_buffer))) )
1389 /* use faster SSE2 aligned fetch and store */
1390 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1392 p_pic_start = p_pic;
1394 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1396 SSE2_CALL (
1397 SSE2_INIT_32_ALIGNED
1398 SSE2_YUV_MUL
1399 SSE2_YUV_ADD
1400 SSE2_UNPACK_32_BGRA_ALIGNED
1402 p_y += 16;
1403 p_u += 8;
1404 p_v += 8;
1405 p_buffer += 16;
1408 /* Here we do some unaligned reads and duplicate conversions, but
1409 * at least we have all the pixels */
1410 if( i_rewind )
1412 p_y -= i_rewind;
1413 p_u -= i_rewind >> 1;
1414 p_v -= i_rewind >> 1;
1415 p_buffer -= i_rewind;
1416 SSE2_CALL (
1417 SSE2_INIT_32_UNALIGNED
1418 SSE2_YUV_MUL
1419 SSE2_YUV_ADD
1420 SSE2_UNPACK_32_BGRA_UNALIGNED
1422 p_y += 16;
1423 p_u += 4;
1424 p_v += 4;
1426 SCALE_WIDTH;
1427 SCALE_HEIGHT( 420, 4 );
1429 p_y += i_source_margin;
1430 if( i_y % 2 )
1432 p_u += i_source_margin_c;
1433 p_v += i_source_margin_c;
1435 p_buffer = b_hscale ? p_buffer_start : p_pic;
1438 else
1440 /* use slower SSE2 unaligned fetch and store */
1441 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1443 p_pic_start = p_pic;
1444 p_buffer = b_hscale ? p_buffer_start : p_pic;
1446 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1448 SSE2_CALL (
1449 SSE2_INIT_32_UNALIGNED
1450 SSE2_YUV_MUL
1451 SSE2_YUV_ADD
1452 SSE2_UNPACK_32_BGRA_UNALIGNED
1454 p_y += 16;
1455 p_u += 8;
1456 p_v += 8;
1457 p_buffer += 16;
1460 /* Here we do some unaligned reads and duplicate conversions, but
1461 * at least we have all the pixels */
1462 if( i_rewind )
1464 p_y -= i_rewind;
1465 p_u -= i_rewind >> 1;
1466 p_v -= i_rewind >> 1;
1467 p_buffer -= i_rewind;
1468 SSE2_CALL (
1469 SSE2_INIT_32_UNALIGNED
1470 SSE2_YUV_MUL
1471 SSE2_YUV_ADD
1472 SSE2_UNPACK_32_BGRA_UNALIGNED
1474 p_y += 16;
1475 p_u += 8;
1476 p_v += 8;
1478 SCALE_WIDTH;
1479 SCALE_HEIGHT( 420, 4 );
1481 p_y += i_source_margin;
1482 if( i_y % 2 )
1484 p_u += i_source_margin_c;
1485 p_v += i_source_margin_c;
1487 p_buffer = b_hscale ? p_buffer_start : p_pic;
1491 #else
1493 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
1495 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1497 p_pic_start = p_pic;
1498 p_buffer = b_hscale ? p_buffer_start : p_pic;
1500 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1502 MMX_CALL (
1503 MMX_INIT_32
1504 MMX_YUV_MUL
1505 MMX_YUV_ADD
1506 MMX_UNPACK_32_BGRA
1508 p_y += 8;
1509 p_u += 4;
1510 p_v += 4;
1511 p_buffer += 8;
1514 /* Here we do some unaligned reads and duplicate conversions, but
1515 * at least we have all the pixels */
1516 if( i_rewind )
1518 p_y -= i_rewind;
1519 p_u -= i_rewind >> 1;
1520 p_v -= i_rewind >> 1;
1521 p_buffer -= i_rewind;
1522 MMX_CALL (
1523 MMX_INIT_32
1524 MMX_YUV_MUL
1525 MMX_YUV_ADD
1526 MMX_UNPACK_32_BGRA
1528 p_y += 8;
1529 p_u += 4;
1530 p_v += 4;
1531 p_buffer += 8;
1533 SCALE_WIDTH;
1534 SCALE_HEIGHT( 420, 4 );
1536 p_y += i_source_margin;
1537 if( i_y % 2 )
1539 p_u += i_source_margin_c;
1540 p_v += i_source_margin_c;
1544 /* re-enable FPU registers */
1545 MMX_END;
1547 #endif
1550 void I420_A8B8G8R8( filter_t *p_filter, picture_t *p_src,
1551 picture_t *p_dest )
1553 /* We got this one from the old arguments */
1554 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1555 uint8_t *p_y = p_src->Y_PIXELS;
1556 uint8_t *p_u = p_src->U_PIXELS;
1557 uint8_t *p_v = p_src->V_PIXELS;
1559 bool b_hscale; /* horizontal scaling type */
1560 unsigned int i_vscale; /* vertical scaling type */
1561 unsigned int i_x, i_y; /* horizontal and vertical indexes */
1563 int i_right_margin;
1564 int i_rewind;
1565 int i_scale_count; /* scale modulo counter */
1566 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
1567 uint32_t * p_pic_start; /* beginning of the current line for copy */
1568 /* Conversion buffer pointer */
1569 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
1570 uint32_t * p_buffer;
1572 /* Offset array pointer */
1573 int * p_offset_start = p_filter->p_sys->p_offset;
1574 int * p_offset;
1576 const int i_source_margin = p_src->p[0].i_pitch
1577 - p_src->p[0].i_visible_pitch;
1578 const int i_source_margin_c = p_src->p[1].i_pitch
1579 - p_src->p[1].i_visible_pitch;
1581 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1583 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1584 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1585 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1586 SetOffset( p_filter->fmt_in.video.i_width,
1587 p_filter->fmt_in.video.i_height,
1588 p_filter->fmt_out.video.i_width,
1589 p_filter->fmt_out.video.i_height,
1590 &b_hscale, &i_vscale, p_offset_start );
1593 * Perform conversion
1595 i_scale_count = ( i_vscale == 1 ) ?
1596 p_filter->fmt_out.video.i_height :
1597 p_filter->fmt_in.video.i_height;
1599 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1601 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
1604 ** SSE2 128 bits fetch/store instructions are faster
1605 ** if memory access is 16 bytes aligned
1608 p_buffer = b_hscale ? p_buffer_start : p_pic;
1609 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1610 p_dest->p->i_pitch|
1611 ((intptr_t)p_y)|
1612 ((intptr_t)p_buffer))) )
1614 /* use faster SSE2 aligned fetch and store */
1615 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1617 p_pic_start = p_pic;
1619 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1621 SSE2_CALL (
1622 SSE2_INIT_32_ALIGNED
1623 SSE2_YUV_MUL
1624 SSE2_YUV_ADD
1625 SSE2_UNPACK_32_ABGR_ALIGNED
1627 p_y += 16;
1628 p_u += 8;
1629 p_v += 8;
1630 p_buffer += 16;
1633 /* Here we do some unaligned reads and duplicate conversions, but
1634 * at least we have all the pixels */
1635 if( i_rewind )
1637 p_y -= i_rewind;
1638 p_u -= i_rewind >> 1;
1639 p_v -= i_rewind >> 1;
1640 p_buffer -= i_rewind;
1641 SSE2_CALL (
1642 SSE2_INIT_32_UNALIGNED
1643 SSE2_YUV_MUL
1644 SSE2_YUV_ADD
1645 SSE2_UNPACK_32_ABGR_UNALIGNED
1647 p_y += 16;
1648 p_u += 4;
1649 p_v += 4;
1651 SCALE_WIDTH;
1652 SCALE_HEIGHT( 420, 4 );
1654 p_y += i_source_margin;
1655 if( i_y % 2 )
1657 p_u += i_source_margin_c;
1658 p_v += i_source_margin_c;
1660 p_buffer = b_hscale ? p_buffer_start : p_pic;
1663 else
1665 /* use slower SSE2 unaligned fetch and store */
1666 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1668 p_pic_start = p_pic;
1669 p_buffer = b_hscale ? p_buffer_start : p_pic;
1671 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1673 SSE2_CALL (
1674 SSE2_INIT_32_UNALIGNED
1675 SSE2_YUV_MUL
1676 SSE2_YUV_ADD
1677 SSE2_UNPACK_32_ABGR_UNALIGNED
1679 p_y += 16;
1680 p_u += 8;
1681 p_v += 8;
1682 p_buffer += 16;
1685 /* Here we do some unaligned reads and duplicate conversions, but
1686 * at least we have all the pixels */
1687 if( i_rewind )
1689 p_y -= i_rewind;
1690 p_u -= i_rewind >> 1;
1691 p_v -= i_rewind >> 1;
1692 p_buffer -= i_rewind;
1693 SSE2_CALL (
1694 SSE2_INIT_32_UNALIGNED
1695 SSE2_YUV_MUL
1696 SSE2_YUV_ADD
1697 SSE2_UNPACK_32_ABGR_UNALIGNED
1699 p_y += 16;
1700 p_u += 8;
1701 p_v += 8;
1703 SCALE_WIDTH;
1704 SCALE_HEIGHT( 420, 4 );
1706 p_y += i_source_margin;
1707 if( i_y % 2 )
1709 p_u += i_source_margin_c;
1710 p_v += i_source_margin_c;
1712 p_buffer = b_hscale ? p_buffer_start : p_pic;
1716 #else
1718 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
1720 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1722 p_pic_start = p_pic;
1723 p_buffer = b_hscale ? p_buffer_start : p_pic;
1725 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1727 MMX_CALL (
1728 MMX_INIT_32
1729 MMX_YUV_MUL
1730 MMX_YUV_ADD
1731 MMX_UNPACK_32_ABGR
1733 p_y += 8;
1734 p_u += 4;
1735 p_v += 4;
1736 p_buffer += 8;
1739 /* Here we do some unaligned reads and duplicate conversions, but
1740 * at least we have all the pixels */
1741 if( i_rewind )
1743 p_y -= i_rewind;
1744 p_u -= i_rewind >> 1;
1745 p_v -= i_rewind >> 1;
1746 p_buffer -= i_rewind;
1747 MMX_CALL (
1748 MMX_INIT_32
1749 MMX_YUV_MUL
1750 MMX_YUV_ADD
1751 MMX_UNPACK_32_ABGR
1753 p_y += 8;
1754 p_u += 4;
1755 p_v += 4;
1756 p_buffer += 8;
1758 SCALE_WIDTH;
1759 SCALE_HEIGHT( 420, 4 );
1761 p_y += i_source_margin;
1762 if( i_y % 2 )
1764 p_u += i_source_margin_c;
1765 p_v += i_source_margin_c;
1769 /* re-enable FPU registers */
1770 MMX_END;
1772 #endif
1775 #endif
1777 /* Following functions are local */
1779 /*****************************************************************************
1780 * SetOffset: build offset array for conversion functions
1781 *****************************************************************************
1782 * This function will build an offset array used in later conversion functions.
1783 * It will also set horizontal and vertical scaling indicators.
1784 *****************************************************************************/
1785 static void SetOffset( int i_width, int i_height, int i_pic_width,
1786 int i_pic_height, bool *pb_hscale,
1787 unsigned int *pi_vscale, int *p_offset )
1789 int i_x; /* x position in destination */
1790 int i_scale_count; /* modulo counter */
1793 * Prepare horizontal offset array
1795 if( i_pic_width - i_width == 0 )
1797 /* No horizontal scaling: YUV conversion is done directly to picture */
1798 *pb_hscale = 0;
1800 else if( i_pic_width - i_width > 0 )
1802 /* Prepare scaling array for horizontal extension */
1803 *pb_hscale = 1;
1804 i_scale_count = i_pic_width;
1805 for( i_x = i_width; i_x--; )
1807 while( (i_scale_count -= i_width) > 0 )
1809 *p_offset++ = 0;
1811 *p_offset++ = 1;
1812 i_scale_count += i_pic_width;
1815 else /* if( i_pic_width - i_width < 0 ) */
1817 /* Prepare scaling array for horizontal reduction */
1818 *pb_hscale = 1;
1819 i_scale_count = i_width;
1820 for( i_x = i_pic_width; i_x--; )
1822 *p_offset = 1;
1823 while( (i_scale_count -= i_pic_width) > 0 )
1825 *p_offset += 1;
1827 p_offset++;
1828 i_scale_count += i_width;
1833 * Set vertical scaling indicator
1835 if( i_pic_height - i_height == 0 )
1837 *pi_vscale = 0;
1839 else if( i_pic_height - i_height > 0 )
1841 *pi_vscale = 1;
1843 else /* if( i_pic_height - i_height < 0 ) */
1845 *pi_vscale = -1;