skins2: fix initial text state to reflect documentation
[vlc.git] / modules / video_chroma / i420_rgb16.c
blobde3cbed9f8b191461ff776da6b0cb6305e007492
1 /*****************************************************************************
2 * i420_rgb16.c : YUV to bitmap RGB conversion module for vlc
3 *****************************************************************************
4 * Copyright (C) 2000 VLC authors and VideoLAN
5 * $Id$
7 * Authors: Samuel Hocevar <sam@zoy.org>
8 * Damien Fouilleul <damienf@videolan.org>
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU Lesser General Public License as published by
12 * the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this program; if not, write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
23 *****************************************************************************/
25 /*****************************************************************************
26 * Preamble
27 *****************************************************************************/
29 #ifdef HAVE_CONFIG_H
30 # include "config.h"
31 #endif
33 #include <vlc_common.h>
34 #include <vlc_filter.h>
35 #include <vlc_cpu.h>
37 #include "i420_rgb.h"
38 #if defined (MODULE_NAME_IS_i420_rgb)
39 # include "i420_rgb_c.h"
40 # define VLC_TARGET
41 #elif defined (MODULE_NAME_IS_i420_rgb_mmx)
42 # include "../mmx/i420_rgb_mmx.h"
43 # define VLC_TARGET VLC_MMX
44 #elif defined (MODULE_NAME_IS_i420_rgb_sse2)
45 # include "../sse2/i420_rgb_sse2.h"
46 # define VLC_TARGET VLC_SSE
47 #endif
49 static void SetOffset( int, int, int, int, bool *,
50 unsigned int *, int * );
52 #if defined (MODULE_NAME_IS_i420_rgb)
53 /*****************************************************************************
54 * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp with dithering
55 *****************************************************************************
56 * Horizontal alignment needed:
57 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
58 * - output: 1 pixel (2 bytes), margins allowed
59 * Vertical alignment needed:
60 * - input: 2 lines (2 Y lines, 1 U/V line)
61 * - output: 1 line
62 *****************************************************************************/
63 void I420_RGB16_dither( filter_t *p_filter, picture_t *p_src,
64 picture_t *p_dest )
66 /* We got this one from the old arguments */
67 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
68 uint8_t *p_y = p_src->Y_PIXELS;
69 uint8_t *p_u = p_src->U_PIXELS;
70 uint8_t *p_v = p_src->V_PIXELS;
72 bool b_hscale; /* horizontal scaling type */
73 unsigned int i_vscale; /* vertical scaling type */
74 unsigned int i_x, i_y; /* horizontal and vertical indexes */
75 unsigned int i_real_y; /* y % 4 */
77 int i_right_margin;
78 int i_rewind;
79 int i_scale_count; /* scale modulo counter */
80 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
81 uint16_t * p_pic_start; /* beginning of the current line for copy */
82 int i_uval, i_vval; /* U and V samples */
83 int i_red, i_green, i_blue; /* U and V modified samples */
84 uint16_t * p_yuv = p_filter->p_sys->p_rgb16;
85 uint16_t * p_ybase; /* Y dependant conversion table */
87 /* Conversion buffer pointer */
88 uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
89 uint16_t * p_buffer;
91 /* Offset array pointer */
92 int * p_offset_start = p_filter->p_sys->p_offset;
93 int * p_offset;
95 const int i_source_margin = p_src->p[0].i_pitch
96 - p_src->p[0].i_visible_pitch;
97 const int i_source_margin_c = p_src->p[1].i_pitch
98 - p_src->p[1].i_visible_pitch;
100 /* The dithering matrices */
101 int dither10[4] = { 0x0, 0x8, 0x2, 0xa };
102 int dither11[4] = { 0xc, 0x4, 0xe, 0x6 };
103 int dither12[4] = { 0x3, 0xb, 0x1, 0x9 };
104 int dither13[4] = { 0xf, 0x7, 0xd, 0x5 };
106 for(i_x = 0; i_x < 4; i_x++)
108 dither10[i_x] = dither10[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
109 dither11[i_x] = dither11[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
110 dither12[i_x] = dither12[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
111 dither13[i_x] = dither13[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
114 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
115 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
117 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
118 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
119 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
120 SetOffset( p_filter->fmt_in.video.i_width,
121 p_filter->fmt_in.video.i_height,
122 p_filter->fmt_out.video.i_width,
123 p_filter->fmt_out.video.i_height,
124 &b_hscale, &i_vscale, p_offset_start );
127 * Perform conversion
129 i_scale_count = ( i_vscale == 1 ) ?
130 p_filter->fmt_out.video.i_height :
131 p_filter->fmt_in.video.i_height;
132 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
134 i_real_y = i_y & 0x3;
135 p_pic_start = p_pic;
136 p_buffer = b_hscale ? p_buffer_start : p_pic;
138 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
140 int *p_dither = dither10;
141 CONVERT_YUV_PIXEL_DITHER(2);
142 p_dither = dither11;
143 CONVERT_Y_PIXEL_DITHER(2);
144 p_dither = dither12;
145 CONVERT_YUV_PIXEL_DITHER(2);
146 p_dither = dither13;
147 CONVERT_Y_PIXEL_DITHER(2);
148 p_dither = dither10;
149 CONVERT_YUV_PIXEL_DITHER(2);
150 p_dither = dither11;
151 CONVERT_Y_PIXEL_DITHER(2);
152 p_dither = dither12;
153 CONVERT_YUV_PIXEL_DITHER(2);
154 p_dither = dither13;
155 CONVERT_Y_PIXEL_DITHER(2);
158 /* Here we do some unaligned reads and duplicate conversions, but
159 * at least we have all the pixels */
160 if( i_rewind )
162 int *p_dither = dither10;
163 p_y -= i_rewind;
164 p_u -= i_rewind >> 1;
165 p_v -= i_rewind >> 1;
166 p_buffer -= i_rewind;
167 CONVERT_YUV_PIXEL_DITHER(2);
168 p_dither = dither11;
169 CONVERT_Y_PIXEL_DITHER(2);
170 p_dither = dither12;
171 CONVERT_YUV_PIXEL_DITHER(2);
172 p_dither = dither13;
173 CONVERT_Y_PIXEL_DITHER(2);
174 p_dither = dither10;
175 CONVERT_YUV_PIXEL_DITHER(2);
176 p_dither = dither11;
177 CONVERT_Y_PIXEL_DITHER(2);
178 p_dither = dither12;
179 CONVERT_YUV_PIXEL_DITHER(2);
180 p_dither = dither13;
181 CONVERT_Y_PIXEL_DITHER(2);
183 SCALE_WIDTH;
184 SCALE_HEIGHT( 420, 2 );
186 p_y += i_source_margin;
187 if( i_y % 2 )
189 p_u += i_source_margin_c;
190 p_v += i_source_margin_c;
194 #endif
196 /*****************************************************************************
197 * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp
198 *****************************************************************************
199 * Horizontal alignment needed:
200 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
201 * - output: 1 pixel (2 bytes), margins allowed
202 * Vertical alignment needed:
203 * - input: 2 lines (2 Y lines, 1 U/V line)
204 * - output: 1 line
205 *****************************************************************************/
207 #if defined (MODULE_NAME_IS_i420_rgb)
209 void I420_RGB16( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
211 /* We got this one from the old arguments */
212 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
213 uint8_t *p_y = p_src->Y_PIXELS;
214 uint8_t *p_u = p_src->U_PIXELS;
215 uint8_t *p_v = p_src->V_PIXELS;
217 bool b_hscale; /* horizontal scaling type */
218 unsigned int i_vscale; /* vertical scaling type */
219 unsigned int i_x, i_y; /* horizontal and vertical indexes */
221 int i_right_margin;
222 int i_rewind;
223 int i_scale_count; /* scale modulo counter */
224 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
225 uint16_t * p_pic_start; /* beginning of the current line for copy */
226 int i_uval, i_vval; /* U and V samples */
227 int i_red, i_green, i_blue; /* U and V modified samples */
228 uint16_t * p_yuv = p_filter->p_sys->p_rgb16;
229 uint16_t * p_ybase; /* Y dependant conversion table */
231 /* Conversion buffer pointer */
232 uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
233 uint16_t * p_buffer;
235 /* Offset array pointer */
236 int * p_offset_start = p_filter->p_sys->p_offset;
237 int * p_offset;
239 const int i_source_margin = p_src->p[0].i_pitch
240 - p_src->p[0].i_visible_pitch;
241 const int i_source_margin_c = p_src->p[1].i_pitch
242 - p_src->p[1].i_visible_pitch;
244 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
245 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
247 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
248 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
249 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
250 SetOffset( p_filter->fmt_in.video.i_width,
251 p_filter->fmt_in.video.i_height,
252 p_filter->fmt_out.video.i_width,
253 p_filter->fmt_out.video.i_height,
254 &b_hscale, &i_vscale, p_offset_start );
257 * Perform conversion
259 i_scale_count = ( i_vscale == 1 ) ?
260 p_filter->fmt_out.video.i_height :
261 p_filter->fmt_in.video.i_height;
262 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
264 p_pic_start = p_pic;
265 p_buffer = b_hscale ? p_buffer_start : p_pic;
267 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
269 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
270 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
271 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
272 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
275 /* Here we do some unaligned reads and duplicate conversions, but
276 * at least we have all the pixels */
277 if( i_rewind )
279 p_y -= i_rewind;
280 p_u -= i_rewind >> 1;
281 p_v -= i_rewind >> 1;
282 p_buffer -= i_rewind;
284 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
285 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
286 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
287 CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2);
289 SCALE_WIDTH;
290 SCALE_HEIGHT( 420, 2 );
292 p_y += i_source_margin;
293 if( i_y % 2 )
295 p_u += i_source_margin_c;
296 p_v += i_source_margin_c;
301 #else // ! defined (MODULE_NAME_IS_i420_rgb)
303 VLC_TARGET
304 void I420_R5G5B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
306 /* We got this one from the old arguments */
307 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
308 uint8_t *p_y = p_src->Y_PIXELS;
309 uint8_t *p_u = p_src->U_PIXELS;
310 uint8_t *p_v = p_src->V_PIXELS;
312 bool b_hscale; /* horizontal scaling type */
313 unsigned int i_vscale; /* vertical scaling type */
314 unsigned int i_x, i_y; /* horizontal and vertical indexes */
316 int i_right_margin;
317 int i_rewind;
318 int i_scale_count; /* scale modulo counter */
319 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
320 uint16_t * p_pic_start; /* beginning of the current line for copy */
322 /* Conversion buffer pointer */
323 uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
324 uint16_t * p_buffer;
326 /* Offset array pointer */
327 int * p_offset_start = p_filter->p_sys->p_offset;
328 int * p_offset;
330 const int i_source_margin = p_src->p[0].i_pitch
331 - p_src->p[0].i_visible_pitch;
332 const int i_source_margin_c = p_src->p[1].i_pitch
333 - p_src->p[1].i_visible_pitch;
335 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
337 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
338 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
339 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
340 SetOffset( p_filter->fmt_in.video.i_width,
341 p_filter->fmt_in.video.i_height,
342 p_filter->fmt_out.video.i_width,
343 p_filter->fmt_out.video.i_height,
344 &b_hscale, &i_vscale, p_offset_start );
348 * Perform conversion
350 i_scale_count = ( i_vscale == 1 ) ?
351 p_filter->fmt_out.video.i_height :
352 p_filter->fmt_in.video.i_height;
354 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
356 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
359 ** SSE2 128 bits fetch/store instructions are faster
360 ** if memory access is 16 bytes aligned
363 p_buffer = b_hscale ? p_buffer_start : p_pic;
364 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
365 p_dest->p->i_pitch|
366 ((intptr_t)p_y)|
367 ((intptr_t)p_buffer))) )
369 /* use faster SSE2 aligned fetch and store */
370 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
372 p_pic_start = p_pic;
374 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
376 SSE2_CALL (
377 SSE2_INIT_16_ALIGNED
378 SSE2_YUV_MUL
379 SSE2_YUV_ADD
380 SSE2_UNPACK_15_ALIGNED
382 p_y += 16;
383 p_u += 8;
384 p_v += 8;
385 p_buffer += 16;
387 /* Here we do some unaligned reads and duplicate conversions, but
388 * at least we have all the pixels */
389 if( i_rewind )
391 p_y -= i_rewind;
392 p_u -= i_rewind >> 1;
393 p_v -= i_rewind >> 1;
394 p_buffer -= i_rewind;
396 SSE2_CALL (
397 SSE2_INIT_16_UNALIGNED
398 SSE2_YUV_MUL
399 SSE2_YUV_ADD
400 SSE2_UNPACK_15_UNALIGNED
402 p_y += 16;
403 p_u += 8;
404 p_v += 8;
406 SCALE_WIDTH;
407 SCALE_HEIGHT( 420, 2 );
409 p_y += i_source_margin;
410 if( i_y % 2 )
412 p_u += i_source_margin_c;
413 p_v += i_source_margin_c;
415 p_buffer = b_hscale ? p_buffer_start : p_pic;
418 else
420 /* use slower SSE2 unaligned fetch and store */
421 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
423 p_pic_start = p_pic;
424 p_buffer = b_hscale ? p_buffer_start : p_pic;
426 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
428 SSE2_CALL (
429 SSE2_INIT_16_UNALIGNED
430 SSE2_YUV_MUL
431 SSE2_YUV_ADD
432 SSE2_UNPACK_15_UNALIGNED
434 p_y += 16;
435 p_u += 8;
436 p_v += 8;
437 p_buffer += 16;
439 /* Here we do some unaligned reads and duplicate conversions, but
440 * at least we have all the pixels */
441 if( i_rewind )
443 p_y -= i_rewind;
444 p_u -= i_rewind >> 1;
445 p_v -= i_rewind >> 1;
446 p_buffer -= i_rewind;
448 SSE2_CALL (
449 SSE2_INIT_16_UNALIGNED
450 SSE2_YUV_MUL
451 SSE2_YUV_ADD
452 SSE2_UNPACK_15_UNALIGNED
454 p_y += 16;
455 p_u += 8;
456 p_v += 8;
458 SCALE_WIDTH;
459 SCALE_HEIGHT( 420, 2 );
461 p_y += i_source_margin;
462 if( i_y % 2 )
464 p_u += i_source_margin_c;
465 p_v += i_source_margin_c;
467 p_buffer = b_hscale ? p_buffer_start : p_pic;
471 /* make sure all SSE2 stores are visible thereafter */
472 SSE2_END;
474 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
476 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
478 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
480 p_pic_start = p_pic;
481 p_buffer = b_hscale ? p_buffer_start : p_pic;
483 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
485 MMX_CALL (
486 MMX_INIT_16
487 MMX_YUV_MUL
488 MMX_YUV_ADD
489 MMX_UNPACK_15
491 p_y += 8;
492 p_u += 4;
493 p_v += 4;
494 p_buffer += 8;
497 /* Here we do some unaligned reads and duplicate conversions, but
498 * at least we have all the pixels */
499 if( i_rewind )
501 p_y -= i_rewind;
502 p_u -= i_rewind >> 1;
503 p_v -= i_rewind >> 1;
504 p_buffer -= i_rewind;
506 MMX_CALL (
507 MMX_INIT_16
508 MMX_YUV_MUL
509 MMX_YUV_ADD
510 MMX_UNPACK_15
512 p_y += 8;
513 p_u += 4;
514 p_v += 4;
515 p_buffer += 8;
517 SCALE_WIDTH;
518 SCALE_HEIGHT( 420, 2 );
520 p_y += i_source_margin;
521 if( i_y % 2 )
523 p_u += i_source_margin_c;
524 p_v += i_source_margin_c;
527 /* re-enable FPU registers */
528 MMX_END;
530 #endif
533 VLC_TARGET
534 void I420_R5G6B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
536 /* We got this one from the old arguments */
537 uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
538 uint8_t *p_y = p_src->Y_PIXELS;
539 uint8_t *p_u = p_src->U_PIXELS;
540 uint8_t *p_v = p_src->V_PIXELS;
542 bool b_hscale; /* horizontal scaling type */
543 unsigned int i_vscale; /* vertical scaling type */
544 unsigned int i_x, i_y; /* horizontal and vertical indexes */
546 int i_right_margin;
547 int i_rewind;
548 int i_scale_count; /* scale modulo counter */
549 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
550 uint16_t * p_pic_start; /* beginning of the current line for copy */
552 /* Conversion buffer pointer */
553 uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
554 uint16_t * p_buffer;
556 /* Offset array pointer */
557 int * p_offset_start = p_filter->p_sys->p_offset;
558 int * p_offset;
560 const int i_source_margin = p_src->p[0].i_pitch
561 - p_src->p[0].i_visible_pitch;
562 const int i_source_margin_c = p_src->p[1].i_pitch
563 - p_src->p[1].i_visible_pitch;
565 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
567 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
568 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
569 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
570 SetOffset( p_filter->fmt_in.video.i_width,
571 p_filter->fmt_in.video.i_height,
572 p_filter->fmt_out.video.i_width,
573 p_filter->fmt_out.video.i_height,
574 &b_hscale, &i_vscale, p_offset_start );
578 * Perform conversion
580 i_scale_count = ( i_vscale == 1 ) ?
581 p_filter->fmt_out.video.i_height :
582 p_filter->fmt_in.video.i_height;
584 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
586 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
589 ** SSE2 128 bits fetch/store instructions are faster
590 ** if memory access is 16 bytes aligned
593 p_buffer = b_hscale ? p_buffer_start : p_pic;
594 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
595 p_dest->p->i_pitch|
596 ((intptr_t)p_y)|
597 ((intptr_t)p_buffer))) )
599 /* use faster SSE2 aligned fetch and store */
600 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
602 p_pic_start = p_pic;
604 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
606 SSE2_CALL (
607 SSE2_INIT_16_ALIGNED
608 SSE2_YUV_MUL
609 SSE2_YUV_ADD
610 SSE2_UNPACK_16_ALIGNED
612 p_y += 16;
613 p_u += 8;
614 p_v += 8;
615 p_buffer += 16;
617 /* Here we do some unaligned reads and duplicate conversions, but
618 * at least we have all the pixels */
619 if( i_rewind )
621 p_y -= i_rewind;
622 p_u -= i_rewind >> 1;
623 p_v -= i_rewind >> 1;
624 p_buffer -= i_rewind;
626 SSE2_CALL (
627 SSE2_INIT_16_UNALIGNED
628 SSE2_YUV_MUL
629 SSE2_YUV_ADD
630 SSE2_UNPACK_16_UNALIGNED
632 p_y += 16;
633 p_u += 8;
634 p_v += 8;
636 SCALE_WIDTH;
637 SCALE_HEIGHT( 420, 2 );
639 p_y += i_source_margin;
640 if( i_y % 2 )
642 p_u += i_source_margin_c;
643 p_v += i_source_margin_c;
645 p_buffer = b_hscale ? p_buffer_start : p_pic;
648 else
650 /* use slower SSE2 unaligned fetch and store */
651 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
653 p_pic_start = p_pic;
654 p_buffer = b_hscale ? p_buffer_start : p_pic;
656 for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
658 SSE2_CALL(
659 SSE2_INIT_16_UNALIGNED
660 SSE2_YUV_MUL
661 SSE2_YUV_ADD
662 SSE2_UNPACK_16_UNALIGNED
664 p_y += 16;
665 p_u += 8;
666 p_v += 8;
667 p_buffer += 16;
669 /* Here we do some unaligned reads and duplicate conversions, but
670 * at least we have all the pixels */
671 if( i_rewind )
673 p_y -= i_rewind;
674 p_u -= i_rewind >> 1;
675 p_v -= i_rewind >> 1;
676 p_buffer -= i_rewind;
678 SSE2_CALL(
679 SSE2_INIT_16_UNALIGNED
680 SSE2_YUV_MUL
681 SSE2_YUV_ADD
682 SSE2_UNPACK_16_UNALIGNED
684 p_y += 16;
685 p_u += 8;
686 p_v += 8;
688 SCALE_WIDTH;
689 SCALE_HEIGHT( 420, 2 );
691 p_y += i_source_margin;
692 if( i_y % 2 )
694 p_u += i_source_margin_c;
695 p_v += i_source_margin_c;
697 p_buffer = b_hscale ? p_buffer_start : p_pic;
701 /* make sure all SSE2 stores are visible thereafter */
702 SSE2_END;
704 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
706 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
708 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
710 p_pic_start = p_pic;
711 p_buffer = b_hscale ? p_buffer_start : p_pic;
713 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
715 MMX_CALL (
716 MMX_INIT_16
717 MMX_YUV_MUL
718 MMX_YUV_ADD
719 MMX_UNPACK_16
721 p_y += 8;
722 p_u += 4;
723 p_v += 4;
724 p_buffer += 8;
727 /* Here we do some unaligned reads and duplicate conversions, but
728 * at least we have all the pixels */
729 if( i_rewind )
731 p_y -= i_rewind;
732 p_u -= i_rewind >> 1;
733 p_v -= i_rewind >> 1;
734 p_buffer -= i_rewind;
736 MMX_CALL (
737 MMX_INIT_16
738 MMX_YUV_MUL
739 MMX_YUV_ADD
740 MMX_UNPACK_16
742 p_y += 8;
743 p_u += 4;
744 p_v += 4;
745 p_buffer += 8;
747 SCALE_WIDTH;
748 SCALE_HEIGHT( 420, 2 );
750 p_y += i_source_margin;
751 if( i_y % 2 )
753 p_u += i_source_margin_c;
754 p_v += i_source_margin_c;
757 /* re-enable FPU registers */
758 MMX_END;
760 #endif
763 #endif
765 /*****************************************************************************
766 * I420_RGB32: color YUV 4:2:0 to RGB 32 bpp
767 *****************************************************************************
768 * Horizontal alignment needed:
769 * - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
770 * - output: 1 pixel (2 bytes), margins allowed
771 * Vertical alignment needed:
772 * - input: 2 lines (2 Y lines, 1 U/V line)
773 * - output: 1 line
774 *****************************************************************************/
776 #if defined (MODULE_NAME_IS_i420_rgb)
778 void I420_RGB32( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
780 /* We got this one from the old arguments */
781 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
782 uint8_t *p_y = p_src->Y_PIXELS;
783 uint8_t *p_u = p_src->U_PIXELS;
784 uint8_t *p_v = p_src->V_PIXELS;
786 bool b_hscale; /* horizontal scaling type */
787 unsigned int i_vscale; /* vertical scaling type */
788 unsigned int i_x, i_y; /* horizontal and vertical indexes */
790 int i_right_margin;
791 int i_rewind;
792 int i_scale_count; /* scale modulo counter */
793 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
794 uint32_t * p_pic_start; /* beginning of the current line for copy */
795 int i_uval, i_vval; /* U and V samples */
796 int i_red, i_green, i_blue; /* U and V modified samples */
797 uint32_t * p_yuv = p_filter->p_sys->p_rgb32;
798 uint32_t * p_ybase; /* Y dependant conversion table */
800 /* Conversion buffer pointer */
801 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
802 uint32_t * p_buffer;
804 /* Offset array pointer */
805 int * p_offset_start = p_filter->p_sys->p_offset;
806 int * p_offset;
808 const int i_source_margin = p_src->p[0].i_pitch
809 - p_src->p[0].i_visible_pitch;
810 const int i_source_margin_c = p_src->p[1].i_pitch
811 - p_src->p[1].i_visible_pitch;
813 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
814 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
816 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
817 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
818 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
819 SetOffset( p_filter->fmt_in.video.i_width,
820 p_filter->fmt_in.video.i_height,
821 p_filter->fmt_out.video.i_width,
822 p_filter->fmt_out.video.i_height,
823 &b_hscale, &i_vscale, p_offset_start );
826 * Perform conversion
828 i_scale_count = ( i_vscale == 1 ) ?
829 p_filter->fmt_out.video.i_height :
830 p_filter->fmt_in.video.i_height;
831 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
833 p_pic_start = p_pic;
834 p_buffer = b_hscale ? p_buffer_start : p_pic;
836 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
838 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
839 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
840 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
841 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
844 /* Here we do some unaligned reads and duplicate conversions, but
845 * at least we have all the pixels */
846 if( i_rewind )
848 p_y -= i_rewind;
849 p_u -= i_rewind >> 1;
850 p_v -= i_rewind >> 1;
851 p_buffer -= i_rewind;
852 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
853 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
854 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
855 CONVERT_YUV_PIXEL(4); CONVERT_Y_PIXEL(4);
857 SCALE_WIDTH;
858 SCALE_HEIGHT( 420, 4 );
860 p_y += i_source_margin;
861 if( i_y % 2 )
863 p_u += i_source_margin_c;
864 p_v += i_source_margin_c;
869 #else // defined (MODULE_NAME_IS_i420_rgb_mmx) || defined (MODULE_NAME_IS_i420_rgb_sse2)
871 VLC_TARGET
872 void I420_A8R8G8B8( filter_t *p_filter, picture_t *p_src,
873 picture_t *p_dest )
875 /* We got this one from the old arguments */
876 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
877 uint8_t *p_y = p_src->Y_PIXELS;
878 uint8_t *p_u = p_src->U_PIXELS;
879 uint8_t *p_v = p_src->V_PIXELS;
881 bool b_hscale; /* horizontal scaling type */
882 unsigned int i_vscale; /* vertical scaling type */
883 unsigned int i_x, i_y; /* horizontal and vertical indexes */
885 int i_right_margin;
886 int i_rewind;
887 int i_scale_count; /* scale modulo counter */
888 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
889 uint32_t * p_pic_start; /* beginning of the current line for copy */
890 /* Conversion buffer pointer */
891 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
892 uint32_t * p_buffer;
894 /* Offset array pointer */
895 int * p_offset_start = p_filter->p_sys->p_offset;
896 int * p_offset;
898 const int i_source_margin = p_src->p[0].i_pitch
899 - p_src->p[0].i_visible_pitch;
900 const int i_source_margin_c = p_src->p[1].i_pitch
901 - p_src->p[1].i_visible_pitch;
903 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
905 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
906 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
907 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
908 SetOffset( p_filter->fmt_in.video.i_width,
909 p_filter->fmt_in.video.i_height,
910 p_filter->fmt_out.video.i_width,
911 p_filter->fmt_out.video.i_height,
912 &b_hscale, &i_vscale, p_offset_start );
915 * Perform conversion
917 i_scale_count = ( i_vscale == 1 ) ?
918 p_filter->fmt_out.video.i_height :
919 p_filter->fmt_in.video.i_height;
921 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
923 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
926 ** SSE2 128 bits fetch/store instructions are faster
927 ** if memory access is 16 bytes aligned
930 p_buffer = b_hscale ? p_buffer_start : p_pic;
931 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
932 p_dest->p->i_pitch|
933 ((intptr_t)p_y)|
934 ((intptr_t)p_buffer))) )
936 /* use faster SSE2 aligned fetch and store */
937 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
939 p_pic_start = p_pic;
941 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
943 SSE2_CALL (
944 SSE2_INIT_32_ALIGNED
945 SSE2_YUV_MUL
946 SSE2_YUV_ADD
947 SSE2_UNPACK_32_ARGB_ALIGNED
949 p_y += 16;
950 p_u += 8;
951 p_v += 8;
952 p_buffer += 16;
955 /* Here we do some unaligned reads and duplicate conversions, but
956 * at least we have all the pixels */
957 if( i_rewind )
959 p_y -= i_rewind;
960 p_u -= i_rewind >> 1;
961 p_v -= i_rewind >> 1;
962 p_buffer -= i_rewind;
963 SSE2_CALL (
964 SSE2_INIT_32_UNALIGNED
965 SSE2_YUV_MUL
966 SSE2_YUV_ADD
967 SSE2_UNPACK_32_ARGB_UNALIGNED
969 p_y += 16;
970 p_u += 4;
971 p_v += 4;
973 SCALE_WIDTH;
974 SCALE_HEIGHT( 420, 4 );
976 p_y += i_source_margin;
977 if( i_y % 2 )
979 p_u += i_source_margin_c;
980 p_v += i_source_margin_c;
982 p_buffer = b_hscale ? p_buffer_start : p_pic;
985 else
987 /* use slower SSE2 unaligned fetch and store */
988 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
990 p_pic_start = p_pic;
991 p_buffer = b_hscale ? p_buffer_start : p_pic;
993 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
995 SSE2_CALL (
996 SSE2_INIT_32_UNALIGNED
997 SSE2_YUV_MUL
998 SSE2_YUV_ADD
999 SSE2_UNPACK_32_ARGB_UNALIGNED
1001 p_y += 16;
1002 p_u += 8;
1003 p_v += 8;
1004 p_buffer += 16;
1007 /* Here we do some unaligned reads and duplicate conversions, but
1008 * at least we have all the pixels */
1009 if( i_rewind )
1011 p_y -= i_rewind;
1012 p_u -= i_rewind >> 1;
1013 p_v -= i_rewind >> 1;
1014 p_buffer -= i_rewind;
1015 SSE2_CALL (
1016 SSE2_INIT_32_UNALIGNED
1017 SSE2_YUV_MUL
1018 SSE2_YUV_ADD
1019 SSE2_UNPACK_32_ARGB_UNALIGNED
1021 p_y += 16;
1022 p_u += 8;
1023 p_v += 8;
1025 SCALE_WIDTH;
1026 SCALE_HEIGHT( 420, 4 );
1028 p_y += i_source_margin;
1029 if( i_y % 2 )
1031 p_u += i_source_margin_c;
1032 p_v += i_source_margin_c;
1034 p_buffer = b_hscale ? p_buffer_start : p_pic;
1038 /* make sure all SSE2 stores are visible thereafter */
1039 SSE2_END;
1041 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
1043 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
1045 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1047 p_pic_start = p_pic;
1048 p_buffer = b_hscale ? p_buffer_start : p_pic;
1050 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1052 MMX_CALL (
1053 MMX_INIT_32
1054 MMX_YUV_MUL
1055 MMX_YUV_ADD
1056 MMX_UNPACK_32_ARGB
1058 p_y += 8;
1059 p_u += 4;
1060 p_v += 4;
1061 p_buffer += 8;
1064 /* Here we do some unaligned reads and duplicate conversions, but
1065 * at least we have all the pixels */
1066 if( i_rewind )
1068 p_y -= i_rewind;
1069 p_u -= i_rewind >> 1;
1070 p_v -= i_rewind >> 1;
1071 p_buffer -= i_rewind;
1072 MMX_CALL (
1073 MMX_INIT_32
1074 MMX_YUV_MUL
1075 MMX_YUV_ADD
1076 MMX_UNPACK_32_ARGB
1078 p_y += 8;
1079 p_u += 4;
1080 p_v += 4;
1081 p_buffer += 8;
1083 SCALE_WIDTH;
1084 SCALE_HEIGHT( 420, 4 );
1086 p_y += i_source_margin;
1087 if( i_y % 2 )
1089 p_u += i_source_margin_c;
1090 p_v += i_source_margin_c;
1094 /* re-enable FPU registers */
1095 MMX_END;
1097 #endif
1100 VLC_TARGET
1101 void I420_R8G8B8A8( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
1103 /* We got this one from the old arguments */
1104 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1105 uint8_t *p_y = p_src->Y_PIXELS;
1106 uint8_t *p_u = p_src->U_PIXELS;
1107 uint8_t *p_v = p_src->V_PIXELS;
1109 bool b_hscale; /* horizontal scaling type */
1110 unsigned int i_vscale; /* vertical scaling type */
1111 unsigned int i_x, i_y; /* horizontal and vertical indexes */
1113 int i_right_margin;
1114 int i_rewind;
1115 int i_scale_count; /* scale modulo counter */
1116 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
1117 uint32_t * p_pic_start; /* beginning of the current line for copy */
1118 /* Conversion buffer pointer */
1119 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
1120 uint32_t * p_buffer;
1122 /* Offset array pointer */
1123 int * p_offset_start = p_filter->p_sys->p_offset;
1124 int * p_offset;
1126 const int i_source_margin = p_src->p[0].i_pitch
1127 - p_src->p[0].i_visible_pitch;
1128 const int i_source_margin_c = p_src->p[1].i_pitch
1129 - p_src->p[1].i_visible_pitch;
1131 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1133 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1134 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1135 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1136 SetOffset( p_filter->fmt_in.video.i_width,
1137 p_filter->fmt_in.video.i_height,
1138 p_filter->fmt_out.video.i_width,
1139 p_filter->fmt_out.video.i_height,
1140 &b_hscale, &i_vscale, p_offset_start );
1143 * Perform conversion
1145 i_scale_count = ( i_vscale == 1 ) ?
1146 p_filter->fmt_out.video.i_height :
1147 p_filter->fmt_in.video.i_height;
1149 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1151 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
1154 ** SSE2 128 bits fetch/store instructions are faster
1155 ** if memory access is 16 bytes aligned
1158 p_buffer = b_hscale ? p_buffer_start : p_pic;
1159 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1160 p_dest->p->i_pitch|
1161 ((intptr_t)p_y)|
1162 ((intptr_t)p_buffer))) )
1164 /* use faster SSE2 aligned fetch and store */
1165 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1167 p_pic_start = p_pic;
1169 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1171 SSE2_CALL (
1172 SSE2_INIT_32_ALIGNED
1173 SSE2_YUV_MUL
1174 SSE2_YUV_ADD
1175 SSE2_UNPACK_32_RGBA_ALIGNED
1177 p_y += 16;
1178 p_u += 8;
1179 p_v += 8;
1180 p_buffer += 16;
1183 /* Here we do some unaligned reads and duplicate conversions, but
1184 * at least we have all the pixels */
1185 if( i_rewind )
1187 p_y -= i_rewind;
1188 p_u -= i_rewind >> 1;
1189 p_v -= i_rewind >> 1;
1190 p_buffer -= i_rewind;
1191 SSE2_CALL (
1192 SSE2_INIT_32_UNALIGNED
1193 SSE2_YUV_MUL
1194 SSE2_YUV_ADD
1195 SSE2_UNPACK_32_RGBA_UNALIGNED
1197 p_y += 16;
1198 p_u += 4;
1199 p_v += 4;
1201 SCALE_WIDTH;
1202 SCALE_HEIGHT( 420, 4 );
1204 p_y += i_source_margin;
1205 if( i_y % 2 )
1207 p_u += i_source_margin_c;
1208 p_v += i_source_margin_c;
1210 p_buffer = b_hscale ? p_buffer_start : p_pic;
1213 else
1215 /* use slower SSE2 unaligned fetch and store */
1216 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1218 p_pic_start = p_pic;
1219 p_buffer = b_hscale ? p_buffer_start : p_pic;
1221 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1223 SSE2_CALL (
1224 SSE2_INIT_32_UNALIGNED
1225 SSE2_YUV_MUL
1226 SSE2_YUV_ADD
1227 SSE2_UNPACK_32_RGBA_UNALIGNED
1229 p_y += 16;
1230 p_u += 8;
1231 p_v += 8;
1232 p_buffer += 16;
1235 /* Here we do some unaligned reads and duplicate conversions, but
1236 * at least we have all the pixels */
1237 if( i_rewind )
1239 p_y -= i_rewind;
1240 p_u -= i_rewind >> 1;
1241 p_v -= i_rewind >> 1;
1242 p_buffer -= i_rewind;
1243 SSE2_CALL (
1244 SSE2_INIT_32_UNALIGNED
1245 SSE2_YUV_MUL
1246 SSE2_YUV_ADD
1247 SSE2_UNPACK_32_RGBA_UNALIGNED
1249 p_y += 16;
1250 p_u += 8;
1251 p_v += 8;
1253 SCALE_WIDTH;
1254 SCALE_HEIGHT( 420, 4 );
1256 p_y += i_source_margin;
1257 if( i_y % 2 )
1259 p_u += i_source_margin_c;
1260 p_v += i_source_margin_c;
1262 p_buffer = b_hscale ? p_buffer_start : p_pic;
1266 /* make sure all SSE2 stores are visible thereafter */
1267 SSE2_END;
1269 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
1271 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
1273 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1275 p_pic_start = p_pic;
1276 p_buffer = b_hscale ? p_buffer_start : p_pic;
1278 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1280 MMX_CALL (
1281 MMX_INIT_32
1282 MMX_YUV_MUL
1283 MMX_YUV_ADD
1284 MMX_UNPACK_32_RGBA
1286 p_y += 8;
1287 p_u += 4;
1288 p_v += 4;
1289 p_buffer += 8;
1292 /* Here we do some unaligned reads and duplicate conversions, but
1293 * at least we have all the pixels */
1294 if( i_rewind )
1296 p_y -= i_rewind;
1297 p_u -= i_rewind >> 1;
1298 p_v -= i_rewind >> 1;
1299 p_buffer -= i_rewind;
1300 MMX_CALL (
1301 MMX_INIT_32
1302 MMX_YUV_MUL
1303 MMX_YUV_ADD
1304 MMX_UNPACK_32_RGBA
1306 p_y += 8;
1307 p_u += 4;
1308 p_v += 4;
1309 p_buffer += 8;
1311 SCALE_WIDTH;
1312 SCALE_HEIGHT( 420, 4 );
1314 p_y += i_source_margin;
1315 if( i_y % 2 )
1317 p_u += i_source_margin_c;
1318 p_v += i_source_margin_c;
1322 /* re-enable FPU registers */
1323 MMX_END;
1325 #endif
1328 VLC_TARGET
1329 void I420_B8G8R8A8( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
1331 /* We got this one from the old arguments */
1332 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1333 uint8_t *p_y = p_src->Y_PIXELS;
1334 uint8_t *p_u = p_src->U_PIXELS;
1335 uint8_t *p_v = p_src->V_PIXELS;
1337 bool b_hscale; /* horizontal scaling type */
1338 unsigned int i_vscale; /* vertical scaling type */
1339 unsigned int i_x, i_y; /* horizontal and vertical indexes */
1341 int i_right_margin;
1342 int i_rewind;
1343 int i_scale_count; /* scale modulo counter */
1344 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
1345 uint32_t * p_pic_start; /* beginning of the current line for copy */
1346 /* Conversion buffer pointer */
1347 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
1348 uint32_t * p_buffer;
1350 /* Offset array pointer */
1351 int * p_offset_start = p_filter->p_sys->p_offset;
1352 int * p_offset;
1354 const int i_source_margin = p_src->p[0].i_pitch
1355 - p_src->p[0].i_visible_pitch;
1356 const int i_source_margin_c = p_src->p[1].i_pitch
1357 - p_src->p[1].i_visible_pitch;
1359 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1361 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1362 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1363 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1364 SetOffset( p_filter->fmt_in.video.i_width,
1365 p_filter->fmt_in.video.i_height,
1366 p_filter->fmt_out.video.i_width,
1367 p_filter->fmt_out.video.i_height,
1368 &b_hscale, &i_vscale, p_offset_start );
1371 * Perform conversion
1373 i_scale_count = ( i_vscale == 1 ) ?
1374 p_filter->fmt_out.video.i_height :
1375 p_filter->fmt_in.video.i_height;
1377 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1379 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
1382 ** SSE2 128 bits fetch/store instructions are faster
1383 ** if memory access is 16 bytes aligned
1386 p_buffer = b_hscale ? p_buffer_start : p_pic;
1387 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1388 p_dest->p->i_pitch|
1389 ((intptr_t)p_y)|
1390 ((intptr_t)p_buffer))) )
1392 /* use faster SSE2 aligned fetch and store */
1393 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1395 p_pic_start = p_pic;
1397 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1399 SSE2_CALL (
1400 SSE2_INIT_32_ALIGNED
1401 SSE2_YUV_MUL
1402 SSE2_YUV_ADD
1403 SSE2_UNPACK_32_BGRA_ALIGNED
1405 p_y += 16;
1406 p_u += 8;
1407 p_v += 8;
1408 p_buffer += 16;
1411 /* Here we do some unaligned reads and duplicate conversions, but
1412 * at least we have all the pixels */
1413 if( i_rewind )
1415 p_y -= i_rewind;
1416 p_u -= i_rewind >> 1;
1417 p_v -= i_rewind >> 1;
1418 p_buffer -= i_rewind;
1419 SSE2_CALL (
1420 SSE2_INIT_32_UNALIGNED
1421 SSE2_YUV_MUL
1422 SSE2_YUV_ADD
1423 SSE2_UNPACK_32_BGRA_UNALIGNED
1425 p_y += 16;
1426 p_u += 4;
1427 p_v += 4;
1429 SCALE_WIDTH;
1430 SCALE_HEIGHT( 420, 4 );
1432 p_y += i_source_margin;
1433 if( i_y % 2 )
1435 p_u += i_source_margin_c;
1436 p_v += i_source_margin_c;
1438 p_buffer = b_hscale ? p_buffer_start : p_pic;
1441 else
1443 /* use slower SSE2 unaligned fetch and store */
1444 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1446 p_pic_start = p_pic;
1447 p_buffer = b_hscale ? p_buffer_start : p_pic;
1449 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1451 SSE2_CALL (
1452 SSE2_INIT_32_UNALIGNED
1453 SSE2_YUV_MUL
1454 SSE2_YUV_ADD
1455 SSE2_UNPACK_32_BGRA_UNALIGNED
1457 p_y += 16;
1458 p_u += 8;
1459 p_v += 8;
1460 p_buffer += 16;
1463 /* Here we do some unaligned reads and duplicate conversions, but
1464 * at least we have all the pixels */
1465 if( i_rewind )
1467 p_y -= i_rewind;
1468 p_u -= i_rewind >> 1;
1469 p_v -= i_rewind >> 1;
1470 p_buffer -= i_rewind;
1471 SSE2_CALL (
1472 SSE2_INIT_32_UNALIGNED
1473 SSE2_YUV_MUL
1474 SSE2_YUV_ADD
1475 SSE2_UNPACK_32_BGRA_UNALIGNED
1477 p_y += 16;
1478 p_u += 8;
1479 p_v += 8;
1481 SCALE_WIDTH;
1482 SCALE_HEIGHT( 420, 4 );
1484 p_y += i_source_margin;
1485 if( i_y % 2 )
1487 p_u += i_source_margin_c;
1488 p_v += i_source_margin_c;
1490 p_buffer = b_hscale ? p_buffer_start : p_pic;
1494 #else
1496 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
1498 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1500 p_pic_start = p_pic;
1501 p_buffer = b_hscale ? p_buffer_start : p_pic;
1503 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1505 MMX_CALL (
1506 MMX_INIT_32
1507 MMX_YUV_MUL
1508 MMX_YUV_ADD
1509 MMX_UNPACK_32_BGRA
1511 p_y += 8;
1512 p_u += 4;
1513 p_v += 4;
1514 p_buffer += 8;
1517 /* Here we do some unaligned reads and duplicate conversions, but
1518 * at least we have all the pixels */
1519 if( i_rewind )
1521 p_y -= i_rewind;
1522 p_u -= i_rewind >> 1;
1523 p_v -= i_rewind >> 1;
1524 p_buffer -= i_rewind;
1525 MMX_CALL (
1526 MMX_INIT_32
1527 MMX_YUV_MUL
1528 MMX_YUV_ADD
1529 MMX_UNPACK_32_BGRA
1531 p_y += 8;
1532 p_u += 4;
1533 p_v += 4;
1534 p_buffer += 8;
1536 SCALE_WIDTH;
1537 SCALE_HEIGHT( 420, 4 );
1539 p_y += i_source_margin;
1540 if( i_y % 2 )
1542 p_u += i_source_margin_c;
1543 p_v += i_source_margin_c;
1547 /* re-enable FPU registers */
1548 MMX_END;
1550 #endif
1553 VLC_TARGET
1554 void I420_A8B8G8R8( filter_t *p_filter, picture_t *p_src, picture_t *p_dest )
1556 /* We got this one from the old arguments */
1557 uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1558 uint8_t *p_y = p_src->Y_PIXELS;
1559 uint8_t *p_u = p_src->U_PIXELS;
1560 uint8_t *p_v = p_src->V_PIXELS;
1562 bool b_hscale; /* horizontal scaling type */
1563 unsigned int i_vscale; /* vertical scaling type */
1564 unsigned int i_x, i_y; /* horizontal and vertical indexes */
1566 int i_right_margin;
1567 int i_rewind;
1568 int i_scale_count; /* scale modulo counter */
1569 int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
1570 uint32_t * p_pic_start; /* beginning of the current line for copy */
1571 /* Conversion buffer pointer */
1572 uint32_t * p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
1573 uint32_t * p_buffer;
1575 /* Offset array pointer */
1576 int * p_offset_start = p_filter->p_sys->p_offset;
1577 int * p_offset;
1579 const int i_source_margin = p_src->p[0].i_pitch
1580 - p_src->p[0].i_visible_pitch;
1581 const int i_source_margin_c = p_src->p[1].i_pitch
1582 - p_src->p[1].i_visible_pitch;
1584 i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1586 /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1587 * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1588 * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1589 SetOffset( p_filter->fmt_in.video.i_width,
1590 p_filter->fmt_in.video.i_height,
1591 p_filter->fmt_out.video.i_width,
1592 p_filter->fmt_out.video.i_height,
1593 &b_hscale, &i_vscale, p_offset_start );
1596 * Perform conversion
1598 i_scale_count = ( i_vscale == 1 ) ?
1599 p_filter->fmt_out.video.i_height :
1600 p_filter->fmt_in.video.i_height;
1602 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1604 i_rewind = (-p_filter->fmt_in.video.i_width) & 15;
1607 ** SSE2 128 bits fetch/store instructions are faster
1608 ** if memory access is 16 bytes aligned
1611 p_buffer = b_hscale ? p_buffer_start : p_pic;
1612 if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1613 p_dest->p->i_pitch|
1614 ((intptr_t)p_y)|
1615 ((intptr_t)p_buffer))) )
1617 /* use faster SSE2 aligned fetch and store */
1618 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1620 p_pic_start = p_pic;
1622 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1624 SSE2_CALL (
1625 SSE2_INIT_32_ALIGNED
1626 SSE2_YUV_MUL
1627 SSE2_YUV_ADD
1628 SSE2_UNPACK_32_ABGR_ALIGNED
1630 p_y += 16;
1631 p_u += 8;
1632 p_v += 8;
1633 p_buffer += 16;
1636 /* Here we do some unaligned reads and duplicate conversions, but
1637 * at least we have all the pixels */
1638 if( i_rewind )
1640 p_y -= i_rewind;
1641 p_u -= i_rewind >> 1;
1642 p_v -= i_rewind >> 1;
1643 p_buffer -= i_rewind;
1644 SSE2_CALL (
1645 SSE2_INIT_32_UNALIGNED
1646 SSE2_YUV_MUL
1647 SSE2_YUV_ADD
1648 SSE2_UNPACK_32_ABGR_UNALIGNED
1650 p_y += 16;
1651 p_u += 4;
1652 p_v += 4;
1654 SCALE_WIDTH;
1655 SCALE_HEIGHT( 420, 4 );
1657 p_y += i_source_margin;
1658 if( i_y % 2 )
1660 p_u += i_source_margin_c;
1661 p_v += i_source_margin_c;
1663 p_buffer = b_hscale ? p_buffer_start : p_pic;
1666 else
1668 /* use slower SSE2 unaligned fetch and store */
1669 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1671 p_pic_start = p_pic;
1672 p_buffer = b_hscale ? p_buffer_start : p_pic;
1674 for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1676 SSE2_CALL (
1677 SSE2_INIT_32_UNALIGNED
1678 SSE2_YUV_MUL
1679 SSE2_YUV_ADD
1680 SSE2_UNPACK_32_ABGR_UNALIGNED
1682 p_y += 16;
1683 p_u += 8;
1684 p_v += 8;
1685 p_buffer += 16;
1688 /* Here we do some unaligned reads and duplicate conversions, but
1689 * at least we have all the pixels */
1690 if( i_rewind )
1692 p_y -= i_rewind;
1693 p_u -= i_rewind >> 1;
1694 p_v -= i_rewind >> 1;
1695 p_buffer -= i_rewind;
1696 SSE2_CALL (
1697 SSE2_INIT_32_UNALIGNED
1698 SSE2_YUV_MUL
1699 SSE2_YUV_ADD
1700 SSE2_UNPACK_32_ABGR_UNALIGNED
1702 p_y += 16;
1703 p_u += 8;
1704 p_v += 8;
1706 SCALE_WIDTH;
1707 SCALE_HEIGHT( 420, 4 );
1709 p_y += i_source_margin;
1710 if( i_y % 2 )
1712 p_u += i_source_margin_c;
1713 p_v += i_source_margin_c;
1715 p_buffer = b_hscale ? p_buffer_start : p_pic;
1719 #else
1721 i_rewind = (-p_filter->fmt_in.video.i_width) & 7;
1723 for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1725 p_pic_start = p_pic;
1726 p_buffer = b_hscale ? p_buffer_start : p_pic;
1728 for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1730 MMX_CALL (
1731 MMX_INIT_32
1732 MMX_YUV_MUL
1733 MMX_YUV_ADD
1734 MMX_UNPACK_32_ABGR
1736 p_y += 8;
1737 p_u += 4;
1738 p_v += 4;
1739 p_buffer += 8;
1742 /* Here we do some unaligned reads and duplicate conversions, but
1743 * at least we have all the pixels */
1744 if( i_rewind )
1746 p_y -= i_rewind;
1747 p_u -= i_rewind >> 1;
1748 p_v -= i_rewind >> 1;
1749 p_buffer -= i_rewind;
1750 MMX_CALL (
1751 MMX_INIT_32
1752 MMX_YUV_MUL
1753 MMX_YUV_ADD
1754 MMX_UNPACK_32_ABGR
1756 p_y += 8;
1757 p_u += 4;
1758 p_v += 4;
1759 p_buffer += 8;
1761 SCALE_WIDTH;
1762 SCALE_HEIGHT( 420, 4 );
1764 p_y += i_source_margin;
1765 if( i_y % 2 )
1767 p_u += i_source_margin_c;
1768 p_v += i_source_margin_c;
1772 /* re-enable FPU registers */
1773 MMX_END;
1775 #endif
1778 #endif
1780 /* Following functions are local */
1782 /*****************************************************************************
1783 * SetOffset: build offset array for conversion functions
1784 *****************************************************************************
1785 * This function will build an offset array used in later conversion functions.
1786 * It will also set horizontal and vertical scaling indicators.
1787 *****************************************************************************/
1788 static void SetOffset( int i_width, int i_height, int i_pic_width,
1789 int i_pic_height, bool *pb_hscale,
1790 unsigned int *pi_vscale, int *p_offset )
1792 int i_x; /* x position in destination */
1793 int i_scale_count; /* modulo counter */
1796 * Prepare horizontal offset array
1798 if( i_pic_width - i_width == 0 )
1800 /* No horizontal scaling: YUV conversion is done directly to picture */
1801 *pb_hscale = 0;
1803 else if( i_pic_width - i_width > 0 )
1805 /* Prepare scaling array for horizontal extension */
1806 *pb_hscale = 1;
1807 i_scale_count = i_pic_width;
1808 for( i_x = i_width; i_x--; )
1810 while( (i_scale_count -= i_width) > 0 )
1812 *p_offset++ = 0;
1814 *p_offset++ = 1;
1815 i_scale_count += i_pic_width;
1818 else /* if( i_pic_width - i_width < 0 ) */
1820 /* Prepare scaling array for horizontal reduction */
1821 *pb_hscale = 1;
1822 i_scale_count = i_width;
1823 for( i_x = i_pic_width; i_x--; )
1825 *p_offset = 1;
1826 while( (i_scale_count -= i_pic_width) > 0 )
1828 *p_offset += 1;
1830 p_offset++;
1831 i_scale_count += i_width;
1836 * Set vertical scaling indicator
1838 if( i_pic_height - i_height == 0 )
1840 *pi_vscale = 0;
1842 else if( i_pic_height - i_height > 0 )
1844 *pi_vscale = 1;
1846 else /* if( i_pic_height - i_height < 0 ) */
1848 *pi_vscale = -1;