modules/video_chroma/i420_rgb16.c

   1 /*****************************************************************************
   2  * i420_rgb16.c : YUV to bitmap RGB conversion module for vlc
   3  *****************************************************************************
   4  * Copyright (C) 2000 the VideoLAN team
   5  * $Id$
   6  *
   7  * Authors: Samuel Hocevar <sam@zoy.org>
   8  *          Damien Fouilleul <damienf@videolan.org>
   9  *
  10  * This program is free software; you can redistribute it and/or modify
  11  * it under the terms of the GNU General Public License as published by
  12  * the Free Software Foundation; either version 2 of the License, or
  13  * (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  23  *****************************************************************************/
  24
  25 /*****************************************************************************
  26  * Preamble
  27  *****************************************************************************/
  28
  29 #ifdef HAVE_CONFIG_H
  30 # include "config.h"
  31 #endif
  32
  33 #include <vlc_common.h>
  34 #include <vlc_filter.h>
  35
  36 #include "i420_rgb.h"
  37 #if defined (MODULE_NAME_IS_i420_rgb)
  38 #   include "i420_rgb_c.h"
  39 #elif defined (MODULE_NAME_IS_i420_rgb_mmx)
  40 #   include "../mmx/i420_rgb_mmx.h"
  41 #elif defined (MODULE_NAME_IS_i420_rgb_sse2)
  42 #   include "../mmx/i420_rgb_mmx.h"
  43 #endif
  44
  45 static void SetOffset( int, int, int, int, bool *,
  46                        unsigned int *, int * );
  47
  48 #if defined (MODULE_NAME_IS_i420_rgb)
  49 /*****************************************************************************
  50  * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp with dithering
  51  *****************************************************************************
  52  * Horizontal alignment needed:
  53  *  - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
  54  *  - output: 1 pixel (2 bytes), margins allowed
  55  * Vertical alignment needed:
  56  *  - input: 2 lines (2 Y lines, 1 U/V line)
  57  *  - output: 1 line
  58  *****************************************************************************/
  59 void I420_RGB16_dither( filter_t *p_filter, picture_t *p_src,
  60                                                 picture_t *p_dest )
  61 {
  62     /* We got this one from the old arguments */
  63     uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
  64     uint8_t  *p_y   = p_src->Y_PIXELS;
  65     uint8_t  *p_u   = p_src->U_PIXELS;
  66     uint8_t  *p_v   = p_src->V_PIXELS;
  67
  68     bool   b_hscale;                        /* horizontal scaling type */
  69     unsigned int i_vscale;                          /* vertical scaling type */
  70     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
  71     unsigned int i_real_y;                                          /* y % 4 */
  72
  73     int         i_right_margin;
  74     int         i_rewind;
  75     int         i_scale_count;                       /* scale modulo counter */
  76     int         i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
  77     uint16_t *  p_pic_start;       /* beginning of the current line for copy */
  78     int         i_uval, i_vval;                           /* U and V samples */
  79     int         i_red, i_green, i_blue;          /* U and V modified samples */
  80     uint16_t *  p_yuv = p_filter->p_sys->p_rgb16;
  81     uint16_t *  p_ybase;                     /* Y dependant conversion table */
  82
  83     /* Conversion buffer pointer */
  84     uint16_t *  p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
  85     uint16_t *  p_buffer;
  86
  87     /* Offset array pointer */
  88     int *       p_offset_start = p_filter->p_sys->p_offset;
  89     int *       p_offset;
  90
  91     const int i_source_margin = p_src->p[0].i_pitch
  92                                  - p_src->p[0].i_visible_pitch;
  93     const int i_source_margin_c = p_src->p[1].i_pitch
  94                                  - p_src->p[1].i_visible_pitch;
  95
  96     /* The dithering matrices */
  97     int dither10[4] = {  0x0,  0x8,  0x2,  0xa };
  98     int dither11[4] = {  0xc,  0x4,  0xe,  0x6 };
  99     int dither12[4] = {  0x3,  0xb,  0x1,  0x9 };
 100     int dither13[4] = {  0xf,  0x7,  0xd,  0x5 };
 101
 102     for(i_x = 0; i_x < 4; i_x++)
 103     {
 104         dither10[i_x] = dither10[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
 105         dither11[i_x] = dither11[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
 106         dither12[i_x] = dither12[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
 107         dither13[i_x] = dither13[i_x] << (SHIFT - 4 + p_filter->fmt_out.video.i_rrshift);
 108     }
 109
 110     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
 111
 112     if( p_filter->fmt_in.video.i_width & 7 )
 113     {
 114         i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
 115     }
 116     else
 117     {
 118         i_rewind = 0;
 119     }
 120
 121     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
 122      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
 123      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
 124     SetOffset( p_filter->fmt_in.video.i_width,
 125                p_filter->fmt_in.video.i_height,
 126                p_filter->fmt_out.video.i_width,
 127                p_filter->fmt_out.video.i_height,
 128                &b_hscale, &i_vscale, p_offset_start );
 129
 130     /*
 131      * Perform conversion
 132      */
 133     i_scale_count = ( i_vscale == 1 ) ?
 134                     p_filter->fmt_out.video.i_height :
 135                     p_filter->fmt_in.video.i_height;
 136     for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
 137     {
 138         i_real_y = i_y & 0x3;
 139         p_pic_start = p_pic;
 140         p_buffer = b_hscale ? p_buffer_start : p_pic;
 141
 142         for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
 143         {
 144             int *p_dither = dither10;
 145             CONVERT_YUV_PIXEL_DITHER(2);
 146             p_dither = dither11;
 147             CONVERT_Y_PIXEL_DITHER(2);
 148             p_dither = dither12;
 149             CONVERT_YUV_PIXEL_DITHER(2);
 150             p_dither = dither13;
 151             CONVERT_Y_PIXEL_DITHER(2);
 152             p_dither = dither10;
 153             CONVERT_YUV_PIXEL_DITHER(2);
 154             p_dither = dither11;
 155             CONVERT_Y_PIXEL_DITHER(2);
 156             p_dither = dither12;
 157             CONVERT_YUV_PIXEL_DITHER(2);
 158             p_dither = dither13;
 159             CONVERT_Y_PIXEL_DITHER(2);
 160         }
 161
 162         /* Here we do some unaligned reads and duplicate conversions, but
 163          * at least we have all the pixels */
 164         if( i_rewind )
 165         {
 166             int *p_dither = dither10;
 167             p_y -= i_rewind;
 168             p_u -= i_rewind >> 1;
 169             p_v -= i_rewind >> 1;
 170             p_buffer -= i_rewind;
 171             CONVERT_YUV_PIXEL_DITHER(2);
 172             p_dither = dither11;
 173             CONVERT_Y_PIXEL_DITHER(2);
 174             p_dither = dither12;
 175             CONVERT_YUV_PIXEL_DITHER(2);
 176             p_dither = dither13;
 177             CONVERT_Y_PIXEL_DITHER(2);
 178             p_dither = dither10;
 179             CONVERT_YUV_PIXEL_DITHER(2);
 180             p_dither = dither11;
 181             CONVERT_Y_PIXEL_DITHER(2);
 182             p_dither = dither12;
 183             CONVERT_YUV_PIXEL_DITHER(2);
 184             p_dither = dither13;
 185             CONVERT_Y_PIXEL_DITHER(2);
 186         }
 187         SCALE_WIDTH;
 188         SCALE_HEIGHT( 420, 2 );
 189
 190         p_y += i_source_margin;
 191         if( i_y % 2 )
 192         {
 193             p_u += i_source_margin_c;
 194             p_v += i_source_margin_c;
 195         }
 196     }
 197 }
 198 #endif
 199
 200 /*****************************************************************************
 201  * I420_RGB16: color YUV 4:2:0 to RGB 16 bpp
 202  *****************************************************************************
 203  * Horizontal alignment needed:
 204  *  - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
 205  *  - output: 1 pixel (2 bytes), margins allowed
 206  * Vertical alignment needed:
 207  *  - input: 2 lines (2 Y lines, 1 U/V line)
 208  *  - output: 1 line
 209  *****************************************************************************/
 210
 211 #if defined (MODULE_NAME_IS_i420_rgb)
 212
 213 void I420_RGB16( filter_t *p_filter, picture_t *p_src,
 214                                          picture_t *p_dest )
 215 {
 216     /* We got this one from the old arguments */
 217     uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
 218     uint8_t  *p_y   = p_src->Y_PIXELS;
 219     uint8_t  *p_u   = p_src->U_PIXELS;
 220     uint8_t  *p_v   = p_src->V_PIXELS;
 221
 222     bool  b_hscale;                         /* horizontal scaling type */
 223     unsigned int i_vscale;                          /* vertical scaling type */
 224     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
 225
 226     int         i_right_margin;
 227     int         i_rewind;
 228     int         i_scale_count;                       /* scale modulo counter */
 229     int         i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
 230     uint16_t *  p_pic_start;       /* beginning of the current line for copy */
 231     int         i_uval, i_vval;                           /* U and V samples */
 232     int         i_red, i_green, i_blue;          /* U and V modified samples */
 233     uint16_t *  p_yuv = p_filter->p_sys->p_rgb16;
 234     uint16_t *  p_ybase;                     /* Y dependant conversion table */
 235
 236     /* Conversion buffer pointer */
 237     uint16_t *  p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
 238     uint16_t *  p_buffer;
 239
 240     /* Offset array pointer */
 241     int *       p_offset_start = p_filter->p_sys->p_offset;
 242     int *       p_offset;
 243
 244     const int i_source_margin = p_src->p[0].i_pitch
 245                                  - p_src->p[0].i_visible_pitch;
 246     const int i_source_margin_c = p_src->p[1].i_pitch
 247                                  - p_src->p[1].i_visible_pitch;
 248
 249     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
 250
 251     if( p_filter->fmt_in.video.i_width & 7 )
 252     {
 253         i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
 254     }
 255     else
 256     {
 257         i_rewind = 0;
 258     }
 259
 260     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
 261      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
 262      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
 263     SetOffset( p_filter->fmt_in.video.i_width,
 264                p_filter->fmt_in.video.i_height,
 265                p_filter->fmt_out.video.i_width,
 266                p_filter->fmt_out.video.i_height,
 267                &b_hscale, &i_vscale, p_offset_start );
 268
 269     /*
 270      * Perform conversion
 271      */
 272     i_scale_count = ( i_vscale == 1 ) ?
 273                     p_filter->fmt_out.video.i_height :
 274                     p_filter->fmt_in.video.i_height;
 275     for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
 276     {
 277         p_pic_start = p_pic;
 278         p_buffer = b_hscale ? p_buffer_start : p_pic;
 279
 280         for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
 281         {
 282             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 283             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 284             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 285             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 286         }
 287
 288         /* Here we do some unaligned reads and duplicate conversions, but
 289          * at least we have all the pixels */
 290         if( i_rewind )
 291         {
 292             p_y -= i_rewind;
 293             p_u -= i_rewind >> 1;
 294             p_v -= i_rewind >> 1;
 295             p_buffer -= i_rewind;
 296
 297             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 298             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 299             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 300             CONVERT_YUV_PIXEL(2);  CONVERT_Y_PIXEL(2);
 301         }
 302         SCALE_WIDTH;
 303         SCALE_HEIGHT( 420, 2 );
 304
 305         p_y += i_source_margin;
 306         if( i_y % 2 )
 307         {
 308             p_u += i_source_margin_c;
 309             p_v += i_source_margin_c;
 310         }
 311     }
 312 }
 313
 314 #else // ! defined (MODULE_NAME_IS_i420_rgb)
 315
 316 void I420_R5G5B5( filter_t *p_filter, picture_t *p_src,
 317                                           picture_t *p_dest )
 318 {
 319     /* We got this one from the old arguments */
 320     uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
 321     uint8_t  *p_y   = p_src->Y_PIXELS;
 322     uint8_t  *p_u   = p_src->U_PIXELS;
 323     uint8_t  *p_v   = p_src->V_PIXELS;
 324
 325     bool  b_hscale;                         /* horizontal scaling type */
 326     unsigned int i_vscale;                          /* vertical scaling type */
 327     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
 328
 329     int         i_right_margin;
 330     int         i_rewind;
 331     int         i_scale_count;                       /* scale modulo counter */
 332     int         i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
 333     uint16_t *  p_pic_start;       /* beginning of the current line for copy */
 334
 335     /* Conversion buffer pointer */
 336     uint16_t *  p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
 337     uint16_t *  p_buffer;
 338
 339     /* Offset array pointer */
 340     int *       p_offset_start = p_filter->p_sys->p_offset;
 341     int *       p_offset;
 342
 343     const int i_source_margin = p_src->p[0].i_pitch
 344                                  - p_src->p[0].i_visible_pitch;
 345     const int i_source_margin_c = p_src->p[1].i_pitch
 346                                  - p_src->p[1].i_visible_pitch;
 347
 348     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
 349
 350     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
 351      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
 352      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
 353     SetOffset( p_filter->fmt_in.video.i_width,
 354                p_filter->fmt_in.video.i_height,
 355                p_filter->fmt_out.video.i_width,
 356                p_filter->fmt_out.video.i_height,
 357                &b_hscale, &i_vscale, p_offset_start );
 358
 359
 360     /*
 361      * Perform conversion
 362      */
 363     i_scale_count = ( i_vscale == 1 ) ?
 364                     p_filter->fmt_out.video.i_height :
 365                     p_filter->fmt_in.video.i_height;
 366
 367 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
 368
 369     if( p_filter->fmt_in.video.i_width & 15 )
 370     {
 371         i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
 372     }
 373     else
 374     {
 375         i_rewind = 0;
 376     }
 377
 378     /*
 379     ** SSE2 128 bits fetch/store instructions are faster
 380     ** if memory access is 16 bytes aligned
 381     */
 382
 383     p_buffer = b_hscale ? p_buffer_start : p_pic;
 384     if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
 385                     p_dest->p->i_pitch|
 386                     ((intptr_t)p_y)|
 387                     ((intptr_t)p_buffer))) )
 388     {
 389         /* use faster SSE2 aligned fetch and store */
 390         for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
 391         {
 392             p_pic_start = p_pic;
 393
 394             for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
 395             {
 396                 SSE2_CALL (
 397                     SSE2_INIT_16_ALIGNED
 398                     SSE2_YUV_MUL
 399                     SSE2_YUV_ADD
 400                     SSE2_UNPACK_15_ALIGNED
 401                 );
 402                 p_y += 16;
 403                 p_u += 8;
 404                 p_v += 8;
 405                 p_buffer += 16;
 406             }
 407             /* Here we do some unaligned reads and duplicate conversions, but
 408              * at least we have all the pixels */
 409             if( i_rewind )
 410             {
 411                 p_y -= i_rewind;
 412                 p_u -= i_rewind >> 1;
 413                 p_v -= i_rewind >> 1;
 414                 p_buffer -= i_rewind;
 415
 416                 SSE2_CALL (
 417                     SSE2_INIT_16_UNALIGNED
 418                     SSE2_YUV_MUL
 419                     SSE2_YUV_ADD
 420                     SSE2_UNPACK_15_UNALIGNED
 421                 );
 422                 p_y += 16;
 423                 p_u += 8;
 424                 p_v += 8;
 425             }
 426             SCALE_WIDTH;
 427             SCALE_HEIGHT( 420, 2 );
 428
 429             p_y += i_source_margin;
 430             if( i_y % 2 )
 431             {
 432                 p_u += i_source_margin_c;
 433                 p_v += i_source_margin_c;
 434             }
 435             p_buffer = b_hscale ? p_buffer_start : p_pic;
 436         }
 437     }
 438     else
 439     {
 440         /* use slower SSE2 unaligned fetch and store */
 441         for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
 442         {
 443             p_pic_start = p_pic;
 444             p_buffer = b_hscale ? p_buffer_start : p_pic;
 445
 446             for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
 447             {
 448                 SSE2_CALL (
 449                     SSE2_INIT_16_UNALIGNED
 450                     SSE2_YUV_MUL
 451                     SSE2_YUV_ADD
 452                     SSE2_UNPACK_15_UNALIGNED
 453                 );
 454                 p_y += 16;
 455                 p_u += 8;
 456                 p_v += 8;
 457                 p_buffer += 16;
 458             }
 459             /* Here we do some unaligned reads and duplicate conversions, but
 460              * at least we have all the pixels */
 461             if( i_rewind )
 462             {
 463                 p_y -= i_rewind;
 464                 p_u -= i_rewind >> 1;
 465                 p_v -= i_rewind >> 1;
 466                 p_buffer -= i_rewind;
 467
 468                 SSE2_CALL (
 469                     SSE2_INIT_16_UNALIGNED
 470                     SSE2_YUV_MUL
 471                     SSE2_YUV_ADD
 472                     SSE2_UNPACK_15_UNALIGNED
 473                 );
 474                 p_y += 16;
 475                 p_u += 8;
 476                 p_v += 8;
 477             }
 478             SCALE_WIDTH;
 479             SCALE_HEIGHT( 420, 2 );
 480
 481             p_y += i_source_margin;
 482             if( i_y % 2 )
 483             {
 484                 p_u += i_source_margin_c;
 485                 p_v += i_source_margin_c;
 486             }
 487             p_buffer = b_hscale ? p_buffer_start : p_pic;
 488         }
 489     }
 490
 491     /* make sure all SSE2 stores are visible thereafter */
 492     SSE2_END;
 493
 494 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
 495
 496     if( p_filter->fmt_in.video.i_width & 7 )
 497     {
 498         i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
 499     }
 500     else
 501     {
 502         i_rewind = 0;
 503     }
 504
 505     for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
 506     {
 507         p_pic_start = p_pic;
 508         p_buffer = b_hscale ? p_buffer_start : p_pic;
 509
 510         for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
 511         {
 512             MMX_CALL (
 513                 MMX_INIT_16
 514                 MMX_YUV_MUL
 515                 MMX_YUV_ADD
 516                 MMX_UNPACK_15
 517             );
 518             p_y += 8;
 519             p_u += 4;
 520             p_v += 4;
 521             p_buffer += 8;
 522         }
 523
 524         /* Here we do some unaligned reads and duplicate conversions, but
 525          * at least we have all the pixels */
 526         if( i_rewind )
 527         {
 528             p_y -= i_rewind;
 529             p_u -= i_rewind >> 1;
 530             p_v -= i_rewind >> 1;
 531             p_buffer -= i_rewind;
 532
 533             MMX_CALL (
 534                 MMX_INIT_16
 535                 MMX_YUV_MUL
 536                 MMX_YUV_ADD
 537                 MMX_UNPACK_15
 538             );
 539             p_y += 8;
 540             p_u += 4;
 541             p_v += 4;
 542             p_buffer += 8;
 543         }
 544         SCALE_WIDTH;
 545         SCALE_HEIGHT( 420, 2 );
 546
 547         p_y += i_source_margin;
 548         if( i_y % 2 )
 549         {
 550             p_u += i_source_margin_c;
 551             p_v += i_source_margin_c;
 552         }
 553     }
 554     /* re-enable FPU registers */
 555     MMX_END;
 556
 557 #endif
 558 }
 559
 560 void I420_R5G6B5( filter_t *p_filter, picture_t *p_src,
 561                                           picture_t *p_dest )
 562 {
 563     /* We got this one from the old arguments */
 564     uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels;
 565     uint8_t  *p_y   = p_src->Y_PIXELS;
 566     uint8_t  *p_u   = p_src->U_PIXELS;
 567     uint8_t  *p_v   = p_src->V_PIXELS;
 568
 569     bool  b_hscale;                         /* horizontal scaling type */
 570     unsigned int i_vscale;                          /* vertical scaling type */
 571     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
 572
 573     int         i_right_margin;
 574     int         i_rewind;
 575     int         i_scale_count;                       /* scale modulo counter */
 576     int         i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
 577     uint16_t *  p_pic_start;       /* beginning of the current line for copy */
 578
 579     /* Conversion buffer pointer */
 580     uint16_t *  p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer;
 581     uint16_t *  p_buffer;
 582
 583     /* Offset array pointer */
 584     int *       p_offset_start = p_filter->p_sys->p_offset;
 585     int *       p_offset;
 586
 587     const int i_source_margin = p_src->p[0].i_pitch
 588                                  - p_src->p[0].i_visible_pitch;
 589     const int i_source_margin_c = p_src->p[1].i_pitch
 590                                  - p_src->p[1].i_visible_pitch;
 591
 592     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
 593
 594     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
 595      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
 596      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
 597     SetOffset( p_filter->fmt_in.video.i_width,
 598                p_filter->fmt_in.video.i_height,
 599                p_filter->fmt_out.video.i_width,
 600                p_filter->fmt_out.video.i_height,
 601                &b_hscale, &i_vscale, p_offset_start );
 602
 603
 604     /*
 605      * Perform conversion
 606      */
 607     i_scale_count = ( i_vscale == 1 ) ?
 608                     p_filter->fmt_out.video.i_height :
 609                     p_filter->fmt_in.video.i_height;
 610
 611 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
 612
 613     if( p_filter->fmt_in.video.i_width & 15 )
 614     {
 615         i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
 616     }
 617     else
 618     {
 619         i_rewind = 0;
 620     }
 621
 622     /*
 623     ** SSE2 128 bits fetch/store instructions are faster
 624     ** if memory access is 16 bytes aligned
 625     */
 626
 627     p_buffer = b_hscale ? p_buffer_start : p_pic;
 628     if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
 629                     p_dest->p->i_pitch|
 630                     ((intptr_t)p_y)|
 631                     ((intptr_t)p_buffer))) )
 632     {
 633         /* use faster SSE2 aligned fetch and store */
 634         for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
 635         {
 636             p_pic_start = p_pic;
 637
 638             for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
 639             {
 640                 SSE2_CALL (
 641                     SSE2_INIT_16_ALIGNED
 642                     SSE2_YUV_MUL
 643                     SSE2_YUV_ADD
 644                     SSE2_UNPACK_16_ALIGNED
 645                 );
 646                 p_y += 16;
 647                 p_u += 8;
 648                 p_v += 8;
 649                 p_buffer += 16;
 650             }
 651             /* Here we do some unaligned reads and duplicate conversions, but
 652              * at least we have all the pixels */
 653             if( i_rewind )
 654             {
 655                 p_y -= i_rewind;
 656                 p_u -= i_rewind >> 1;
 657                 p_v -= i_rewind >> 1;
 658                 p_buffer -= i_rewind;
 659
 660                 SSE2_CALL (
 661                     SSE2_INIT_16_UNALIGNED
 662                     SSE2_YUV_MUL
 663                     SSE2_YUV_ADD
 664                     SSE2_UNPACK_16_UNALIGNED
 665                 );
 666                 p_y += 16;
 667                 p_u += 8;
 668                 p_v += 8;
 669             }
 670             SCALE_WIDTH;
 671             SCALE_HEIGHT( 420, 2 );
 672
 673             p_y += i_source_margin;
 674             if( i_y % 2 )
 675             {
 676                 p_u += i_source_margin_c;
 677                 p_v += i_source_margin_c;
 678             }
 679             p_buffer = b_hscale ? p_buffer_start : p_pic;
 680         }
 681     }
 682     else
 683     {
 684         /* use slower SSE2 unaligned fetch and store */
 685         for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
 686         {
 687             p_pic_start = p_pic;
 688             p_buffer = b_hscale ? p_buffer_start : p_pic;
 689
 690             for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; )
 691             {
 692                 SSE2_CALL(
 693                     SSE2_INIT_16_UNALIGNED
 694                     SSE2_YUV_MUL
 695                     SSE2_YUV_ADD
 696                     SSE2_UNPACK_16_UNALIGNED
 697                 );
 698                 p_y += 16;
 699                 p_u += 8;
 700                 p_v += 8;
 701                 p_buffer += 16;
 702             }
 703             /* Here we do some unaligned reads and duplicate conversions, but
 704              * at least we have all the pixels */
 705             if( i_rewind )
 706             {
 707                 p_y -= i_rewind;
 708                 p_u -= i_rewind >> 1;
 709                 p_v -= i_rewind >> 1;
 710                 p_buffer -= i_rewind;
 711
 712                 SSE2_CALL(
 713                     SSE2_INIT_16_UNALIGNED
 714                     SSE2_YUV_MUL
 715                     SSE2_YUV_ADD
 716                     SSE2_UNPACK_16_UNALIGNED
 717                 );
 718                 p_y += 16;
 719                 p_u += 8;
 720                 p_v += 8;
 721             }
 722             SCALE_WIDTH;
 723             SCALE_HEIGHT( 420, 2 );
 724
 725             p_y += i_source_margin;
 726             if( i_y % 2 )
 727             {
 728                 p_u += i_source_margin_c;
 729                 p_v += i_source_margin_c;
 730             }
 731             p_buffer = b_hscale ? p_buffer_start : p_pic;
 732         }
 733     }
 734
 735     /* make sure all SSE2 stores are visible thereafter */
 736     SSE2_END;
 737
 738 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
 739
 740     if( p_filter->fmt_in.video.i_width & 7 )
 741     {
 742         i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
 743     }
 744     else
 745     {
 746         i_rewind = 0;
 747     }
 748
 749     for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
 750     {
 751         p_pic_start = p_pic;
 752         p_buffer = b_hscale ? p_buffer_start : p_pic;
 753
 754         for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
 755         {
 756             MMX_CALL (
 757                 MMX_INIT_16
 758                 MMX_YUV_MUL
 759                 MMX_YUV_ADD
 760                 MMX_UNPACK_16
 761             );
 762             p_y += 8;
 763             p_u += 4;
 764             p_v += 4;
 765             p_buffer += 8;
 766         }
 767
 768         /* Here we do some unaligned reads and duplicate conversions, but
 769          * at least we have all the pixels */
 770         if( i_rewind )
 771         {
 772             p_y -= i_rewind;
 773             p_u -= i_rewind >> 1;
 774             p_v -= i_rewind >> 1;
 775             p_buffer -= i_rewind;
 776
 777             MMX_CALL (
 778                 MMX_INIT_16
 779                 MMX_YUV_MUL
 780                 MMX_YUV_ADD
 781                 MMX_UNPACK_16
 782             );
 783             p_y += 8;
 784             p_u += 4;
 785             p_v += 4;
 786             p_buffer += 8;
 787         }
 788         SCALE_WIDTH;
 789         SCALE_HEIGHT( 420, 2 );
 790
 791         p_y += i_source_margin;
 792         if( i_y % 2 )
 793         {
 794             p_u += i_source_margin_c;
 795             p_v += i_source_margin_c;
 796         }
 797     }
 798     /* re-enable FPU registers */
 799     MMX_END;
 800
 801 #endif
 802 }
 803
 804 #endif
 805
 806 /*****************************************************************************
 807  * I420_RGB32: color YUV 4:2:0 to RGB 32 bpp
 808  *****************************************************************************
 809  * Horizontal alignment needed:
 810  *  - input: 8 pixels (8 Y bytes, 4 U/V bytes), margins not allowed
 811  *  - output: 1 pixel (2 bytes), margins allowed
 812  * Vertical alignment needed:
 813  *  - input: 2 lines (2 Y lines, 1 U/V line)
 814  *  - output: 1 line
 815  *****************************************************************************/
 816
 817 #if defined (MODULE_NAME_IS_i420_rgb)
 818
 819 void I420_RGB32( filter_t *p_filter, picture_t *p_src,
 820                                          picture_t *p_dest )
 821 {
 822     /* We got this one from the old arguments */
 823     uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
 824     uint8_t  *p_y   = p_src->Y_PIXELS;
 825     uint8_t  *p_u   = p_src->U_PIXELS;
 826     uint8_t  *p_v   = p_src->V_PIXELS;
 827
 828     bool  b_hscale;                         /* horizontal scaling type */
 829     unsigned int i_vscale;                          /* vertical scaling type */
 830     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
 831
 832     int         i_right_margin;
 833     int         i_rewind;
 834     int         i_scale_count;                       /* scale modulo counter */
 835     int         i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
 836     uint32_t *  p_pic_start;       /* beginning of the current line for copy */
 837     int         i_uval, i_vval;                           /* U and V samples */
 838     int         i_red, i_green, i_blue;          /* U and V modified samples */
 839     uint32_t *  p_yuv = p_filter->p_sys->p_rgb32;
 840     uint32_t *  p_ybase;                     /* Y dependant conversion table */
 841
 842     /* Conversion buffer pointer */
 843     uint32_t *  p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
 844     uint32_t *  p_buffer;
 845
 846     /* Offset array pointer */
 847     int *       p_offset_start = p_filter->p_sys->p_offset;
 848     int *       p_offset;
 849
 850     const int i_source_margin = p_src->p[0].i_pitch
 851                                  - p_src->p[0].i_visible_pitch;
 852     const int i_source_margin_c = p_src->p[1].i_pitch
 853                                  - p_src->p[1].i_visible_pitch;
 854
 855     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
 856
 857     if( p_filter->fmt_in.video.i_width & 7 )
 858     {
 859         i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
 860     }
 861     else
 862     {
 863         i_rewind = 0;
 864     }
 865
 866     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
 867      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
 868      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
 869     SetOffset( p_filter->fmt_in.video.i_width,
 870                p_filter->fmt_in.video.i_height,
 871                p_filter->fmt_out.video.i_width,
 872                p_filter->fmt_out.video.i_height,
 873                &b_hscale, &i_vscale, p_offset_start );
 874
 875     /*
 876      * Perform conversion
 877      */
 878     i_scale_count = ( i_vscale == 1 ) ?
 879                     p_filter->fmt_out.video.i_height :
 880                     p_filter->fmt_in.video.i_height;
 881     for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
 882     {
 883         p_pic_start = p_pic;
 884         p_buffer = b_hscale ? p_buffer_start : p_pic;
 885
 886         for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
 887         {
 888             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 889             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 890             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 891             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 892         }
 893
 894         /* Here we do some unaligned reads and duplicate conversions, but
 895          * at least we have all the pixels */
 896         if( i_rewind )
 897         {
 898             p_y -= i_rewind;
 899             p_u -= i_rewind >> 1;
 900             p_v -= i_rewind >> 1;
 901             p_buffer -= i_rewind;
 902             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 903             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 904             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 905             CONVERT_YUV_PIXEL(4);  CONVERT_Y_PIXEL(4);
 906         }
 907         SCALE_WIDTH;
 908         SCALE_HEIGHT( 420, 4 );
 909
 910         p_y += i_source_margin;
 911         if( i_y % 2 )
 912         {
 913             p_u += i_source_margin_c;
 914             p_v += i_source_margin_c;
 915         }
 916     }
 917 }
 918
 919 #else // defined (MODULE_NAME_IS_i420_rgb_mmx) || defined (MODULE_NAME_IS_i420_rgb_sse2)
 920
 921 void I420_A8R8G8B8( filter_t *p_filter, picture_t *p_src,
 922                                             picture_t *p_dest )
 923 {
 924     /* We got this one from the old arguments */
 925     uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
 926     uint8_t  *p_y   = p_src->Y_PIXELS;
 927     uint8_t  *p_u   = p_src->U_PIXELS;
 928     uint8_t  *p_v   = p_src->V_PIXELS;
 929
 930     bool  b_hscale;                         /* horizontal scaling type */
 931     unsigned int i_vscale;                          /* vertical scaling type */
 932     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
 933
 934     int         i_right_margin;
 935     int         i_rewind;
 936     int         i_scale_count;                       /* scale modulo counter */
 937     int         i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
 938     uint32_t *  p_pic_start;       /* beginning of the current line for copy */
 939     /* Conversion buffer pointer */
 940     uint32_t *  p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
 941     uint32_t *  p_buffer;
 942
 943     /* Offset array pointer */
 944     int *       p_offset_start = p_filter->p_sys->p_offset;
 945     int *       p_offset;
 946
 947     const int i_source_margin = p_src->p[0].i_pitch
 948                                  - p_src->p[0].i_visible_pitch;
 949     const int i_source_margin_c = p_src->p[1].i_pitch
 950                                  - p_src->p[1].i_visible_pitch;
 951
 952     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
 953
 954     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
 955      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
 956      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
 957     SetOffset( p_filter->fmt_in.video.i_width,
 958                p_filter->fmt_in.video.i_height,
 959                p_filter->fmt_out.video.i_width,
 960                p_filter->fmt_out.video.i_height,
 961                &b_hscale, &i_vscale, p_offset_start );
 962
 963     /*
 964      * Perform conversion
 965      */
 966     i_scale_count = ( i_vscale == 1 ) ?
 967                     p_filter->fmt_out.video.i_height :
 968                     p_filter->fmt_in.video.i_height;
 969
 970 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
 971
 972     if( p_filter->fmt_in.video.i_width & 15 )
 973     {
 974         i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
 975     }
 976     else
 977     {
 978         i_rewind = 0;
 979     }
 980
 981     /*
 982     ** SSE2 128 bits fetch/store instructions are faster
 983     ** if memory access is 16 bytes aligned
 984     */
 985
 986     p_buffer = b_hscale ? p_buffer_start : p_pic;
 987     if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
 988                     p_dest->p->i_pitch|
 989                     ((intptr_t)p_y)|
 990                     ((intptr_t)p_buffer))) )
 991     {
 992         /* use faster SSE2 aligned fetch and store */
 993         for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
 994         {
 995             p_pic_start = p_pic;
 996
 997             for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
 998             {
 999                 SSE2_CALL (
1000                     SSE2_INIT_32_ALIGNED
1001                     SSE2_YUV_MUL
1002                     SSE2_YUV_ADD
1003                     SSE2_UNPACK_32_ARGB_ALIGNED
1004                 );
1005                 p_y += 16;
1006                 p_u += 8;
1007                 p_v += 8;
1008                 p_buffer += 16;
1009             }
1010
1011             /* Here we do some unaligned reads and duplicate conversions, but
1012              * at least we have all the pixels */
1013             if( i_rewind )
1014             {
1015                 p_y -= i_rewind;
1016                 p_u -= i_rewind >> 1;
1017                 p_v -= i_rewind >> 1;
1018                 p_buffer -= i_rewind;
1019                 SSE2_CALL (
1020                     SSE2_INIT_32_UNALIGNED
1021                     SSE2_YUV_MUL
1022                     SSE2_YUV_ADD
1023                     SSE2_UNPACK_32_ARGB_UNALIGNED
1024                 );
1025                 p_y += 16;
1026                 p_u += 4;
1027                 p_v += 4;
1028             }
1029             SCALE_WIDTH;
1030             SCALE_HEIGHT( 420, 4 );
1031
1032             p_y += i_source_margin;
1033             if( i_y % 2 )
1034             {
1035                 p_u += i_source_margin_c;
1036                 p_v += i_source_margin_c;
1037             }
1038             p_buffer = b_hscale ? p_buffer_start : p_pic;
1039         }
1040     }
1041     else
1042     {
1043         /* use slower SSE2 unaligned fetch and store */
1044         for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1045         {
1046             p_pic_start = p_pic;
1047             p_buffer = b_hscale ? p_buffer_start : p_pic;
1048
1049             for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1050             {
1051                 SSE2_CALL (
1052                     SSE2_INIT_32_UNALIGNED
1053                     SSE2_YUV_MUL
1054                     SSE2_YUV_ADD
1055                     SSE2_UNPACK_32_ARGB_UNALIGNED
1056                 );
1057                 p_y += 16;
1058                 p_u += 8;
1059                 p_v += 8;
1060                 p_buffer += 16;
1061             }
1062
1063             /* Here we do some unaligned reads and duplicate conversions, but
1064              * at least we have all the pixels */
1065             if( i_rewind )
1066             {
1067                 p_y -= i_rewind;
1068                 p_u -= i_rewind >> 1;
1069                 p_v -= i_rewind >> 1;
1070                 p_buffer -= i_rewind;
1071                 SSE2_CALL (
1072                     SSE2_INIT_32_UNALIGNED
1073                     SSE2_YUV_MUL
1074                     SSE2_YUV_ADD
1075                     SSE2_UNPACK_32_ARGB_UNALIGNED
1076                 );
1077                 p_y += 16;
1078                 p_u += 8;
1079                 p_v += 8;
1080             }
1081             SCALE_WIDTH;
1082             SCALE_HEIGHT( 420, 4 );
1083
1084             p_y += i_source_margin;
1085             if( i_y % 2 )
1086             {
1087                 p_u += i_source_margin_c;
1088                 p_v += i_source_margin_c;
1089             }
1090             p_buffer = b_hscale ? p_buffer_start : p_pic;
1091         }
1092     }
1093
1094     /* make sure all SSE2 stores are visible thereafter */
1095     SSE2_END;
1096
1097 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
1098
1099     if( p_filter->fmt_in.video.i_width & 7 )
1100     {
1101         i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
1102     }
1103     else
1104     {
1105         i_rewind = 0;
1106     }
1107
1108     for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1109     {
1110         p_pic_start = p_pic;
1111         p_buffer = b_hscale ? p_buffer_start : p_pic;
1112
1113         for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1114         {
1115             MMX_CALL (
1116                 MMX_INIT_32
1117                 MMX_YUV_MUL
1118                 MMX_YUV_ADD
1119                 MMX_UNPACK_32_ARGB
1120             );
1121             p_y += 8;
1122             p_u += 4;
1123             p_v += 4;
1124             p_buffer += 8;
1125         }
1126
1127         /* Here we do some unaligned reads and duplicate conversions, but
1128          * at least we have all the pixels */
1129         if( i_rewind )
1130         {
1131             p_y -= i_rewind;
1132             p_u -= i_rewind >> 1;
1133             p_v -= i_rewind >> 1;
1134             p_buffer -= i_rewind;
1135             MMX_CALL (
1136                 MMX_INIT_32
1137                 MMX_YUV_MUL
1138                 MMX_YUV_ADD
1139                 MMX_UNPACK_32_ARGB
1140             );
1141             p_y += 8;
1142             p_u += 4;
1143             p_v += 4;
1144             p_buffer += 8;
1145         }
1146         SCALE_WIDTH;
1147         SCALE_HEIGHT( 420, 4 );
1148
1149         p_y += i_source_margin;
1150         if( i_y % 2 )
1151         {
1152             p_u += i_source_margin_c;
1153             p_v += i_source_margin_c;
1154         }
1155     }
1156
1157     /* re-enable FPU registers */
1158     MMX_END;
1159
1160 #endif
1161 }
1162
1163 void I420_R8G8B8A8( filter_t *p_filter, picture_t *p_src,
1164                                             picture_t *p_dest )
1165 {
1166     /* We got this one from the old arguments */
1167     uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1168     uint8_t  *p_y   = p_src->Y_PIXELS;
1169     uint8_t  *p_u   = p_src->U_PIXELS;
1170     uint8_t  *p_v   = p_src->V_PIXELS;
1171
1172     bool  b_hscale;                         /* horizontal scaling type */
1173     unsigned int i_vscale;                          /* vertical scaling type */
1174     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
1175
1176     int         i_right_margin;
1177     int         i_rewind;
1178     int         i_scale_count;                       /* scale modulo counter */
1179     int         i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
1180     uint32_t *  p_pic_start;       /* beginning of the current line for copy */
1181     /* Conversion buffer pointer */
1182     uint32_t *  p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
1183     uint32_t *  p_buffer;
1184
1185     /* Offset array pointer */
1186     int *       p_offset_start = p_filter->p_sys->p_offset;
1187     int *       p_offset;
1188
1189     const int i_source_margin = p_src->p[0].i_pitch
1190                                  - p_src->p[0].i_visible_pitch;
1191     const int i_source_margin_c = p_src->p[1].i_pitch
1192                                  - p_src->p[1].i_visible_pitch;
1193
1194     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1195
1196     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1197      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1198      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1199     SetOffset( p_filter->fmt_in.video.i_width,
1200                p_filter->fmt_in.video.i_height,
1201                p_filter->fmt_out.video.i_width,
1202                p_filter->fmt_out.video.i_height,
1203                &b_hscale, &i_vscale, p_offset_start );
1204
1205     /*
1206      * Perform conversion
1207      */
1208     i_scale_count = ( i_vscale == 1 ) ?
1209                     p_filter->fmt_out.video.i_height :
1210                     p_filter->fmt_in.video.i_height;
1211
1212 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1213
1214     if( p_filter->fmt_in.video.i_width & 15 )
1215     {
1216         i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
1217     }
1218     else
1219     {
1220         i_rewind = 0;
1221     }
1222
1223     /*
1224     ** SSE2 128 bits fetch/store instructions are faster
1225     ** if memory access is 16 bytes aligned
1226     */
1227
1228     p_buffer = b_hscale ? p_buffer_start : p_pic;
1229     if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1230                     p_dest->p->i_pitch|
1231                     ((intptr_t)p_y)|
1232                     ((intptr_t)p_buffer))) )
1233     {
1234         /* use faster SSE2 aligned fetch and store */
1235         for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1236         {
1237             p_pic_start = p_pic;
1238
1239             for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1240             {
1241                 SSE2_CALL (
1242                     SSE2_INIT_32_ALIGNED
1243                     SSE2_YUV_MUL
1244                     SSE2_YUV_ADD
1245                     SSE2_UNPACK_32_RGBA_ALIGNED
1246                 );
1247                 p_y += 16;
1248                 p_u += 8;
1249                 p_v += 8;
1250                 p_buffer += 16;
1251             }
1252
1253             /* Here we do some unaligned reads and duplicate conversions, but
1254              * at least we have all the pixels */
1255             if( i_rewind )
1256             {
1257                 p_y -= i_rewind;
1258                 p_u -= i_rewind >> 1;
1259                 p_v -= i_rewind >> 1;
1260                 p_buffer -= i_rewind;
1261                 SSE2_CALL (
1262                     SSE2_INIT_32_UNALIGNED
1263                     SSE2_YUV_MUL
1264                     SSE2_YUV_ADD
1265                     SSE2_UNPACK_32_RGBA_UNALIGNED
1266                 );
1267                 p_y += 16;
1268                 p_u += 4;
1269                 p_v += 4;
1270             }
1271             SCALE_WIDTH;
1272             SCALE_HEIGHT( 420, 4 );
1273
1274             p_y += i_source_margin;
1275             if( i_y % 2 )
1276             {
1277                 p_u += i_source_margin_c;
1278                 p_v += i_source_margin_c;
1279             }
1280             p_buffer = b_hscale ? p_buffer_start : p_pic;
1281         }
1282     }
1283     else
1284     {
1285         /* use slower SSE2 unaligned fetch and store */
1286         for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1287         {
1288             p_pic_start = p_pic;
1289             p_buffer = b_hscale ? p_buffer_start : p_pic;
1290
1291             for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1292             {
1293                 SSE2_CALL (
1294                     SSE2_INIT_32_UNALIGNED
1295                     SSE2_YUV_MUL
1296                     SSE2_YUV_ADD
1297                     SSE2_UNPACK_32_RGBA_UNALIGNED
1298                 );
1299                 p_y += 16;
1300                 p_u += 8;
1301                 p_v += 8;
1302                 p_buffer += 16;
1303             }
1304
1305             /* Here we do some unaligned reads and duplicate conversions, but
1306              * at least we have all the pixels */
1307             if( i_rewind )
1308             {
1309                 p_y -= i_rewind;
1310                 p_u -= i_rewind >> 1;
1311                 p_v -= i_rewind >> 1;
1312                 p_buffer -= i_rewind;
1313                 SSE2_CALL (
1314                     SSE2_INIT_32_UNALIGNED
1315                     SSE2_YUV_MUL
1316                     SSE2_YUV_ADD
1317                     SSE2_UNPACK_32_RGBA_UNALIGNED
1318                 );
1319                 p_y += 16;
1320                 p_u += 8;
1321                 p_v += 8;
1322             }
1323             SCALE_WIDTH;
1324             SCALE_HEIGHT( 420, 4 );
1325
1326             p_y += i_source_margin;
1327             if( i_y % 2 )
1328             {
1329                 p_u += i_source_margin_c;
1330                 p_v += i_source_margin_c;
1331             }
1332             p_buffer = b_hscale ? p_buffer_start : p_pic;
1333         }
1334     }
1335
1336     /* make sure all SSE2 stores are visible thereafter */
1337     SSE2_END;
1338
1339 #else // defined (MODULE_NAME_IS_i420_rgb_mmx)
1340
1341     if( p_filter->fmt_in.video.i_width & 7 )
1342     {
1343         i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
1344     }
1345     else
1346     {
1347         i_rewind = 0;
1348     }
1349
1350     for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1351     {
1352         p_pic_start = p_pic;
1353         p_buffer = b_hscale ? p_buffer_start : p_pic;
1354
1355         for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1356         {
1357             MMX_CALL (
1358                 MMX_INIT_32
1359                 MMX_YUV_MUL
1360                 MMX_YUV_ADD
1361                 MMX_UNPACK_32_RGBA
1362             );
1363             p_y += 8;
1364             p_u += 4;
1365             p_v += 4;
1366             p_buffer += 8;
1367         }
1368
1369         /* Here we do some unaligned reads and duplicate conversions, but
1370          * at least we have all the pixels */
1371         if( i_rewind )
1372         {
1373             p_y -= i_rewind;
1374             p_u -= i_rewind >> 1;
1375             p_v -= i_rewind >> 1;
1376             p_buffer -= i_rewind;
1377             MMX_CALL (
1378                 MMX_INIT_32
1379                 MMX_YUV_MUL
1380                 MMX_YUV_ADD
1381                 MMX_UNPACK_32_RGBA
1382             );
1383             p_y += 8;
1384             p_u += 4;
1385             p_v += 4;
1386             p_buffer += 8;
1387         }
1388         SCALE_WIDTH;
1389         SCALE_HEIGHT( 420, 4 );
1390
1391         p_y += i_source_margin;
1392         if( i_y % 2 )
1393         {
1394             p_u += i_source_margin_c;
1395             p_v += i_source_margin_c;
1396         }
1397     }
1398
1399     /* re-enable FPU registers */
1400     MMX_END;
1401
1402 #endif
1403 }
1404
1405 void I420_B8G8R8A8( filter_t *p_filter, picture_t *p_src,
1406                                             picture_t *p_dest )
1407 {
1408     /* We got this one from the old arguments */
1409     uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1410     uint8_t  *p_y   = p_src->Y_PIXELS;
1411     uint8_t  *p_u   = p_src->U_PIXELS;
1412     uint8_t  *p_v   = p_src->V_PIXELS;
1413
1414     bool  b_hscale;                         /* horizontal scaling type */
1415     unsigned int i_vscale;                          /* vertical scaling type */
1416     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
1417
1418     int         i_right_margin;
1419     int         i_rewind;
1420     int         i_scale_count;                       /* scale modulo counter */
1421     int         i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
1422     uint32_t *  p_pic_start;       /* beginning of the current line for copy */
1423     /* Conversion buffer pointer */
1424     uint32_t *  p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
1425     uint32_t *  p_buffer;
1426
1427     /* Offset array pointer */
1428     int *       p_offset_start = p_filter->p_sys->p_offset;
1429     int *       p_offset;
1430
1431     const int i_source_margin = p_src->p[0].i_pitch
1432                                  - p_src->p[0].i_visible_pitch;
1433     const int i_source_margin_c = p_src->p[1].i_pitch
1434                                  - p_src->p[1].i_visible_pitch;
1435
1436     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1437
1438     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1439      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1440      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1441     SetOffset( p_filter->fmt_in.video.i_width,
1442                p_filter->fmt_in.video.i_height,
1443                p_filter->fmt_out.video.i_width,
1444                p_filter->fmt_out.video.i_height,
1445                &b_hscale, &i_vscale, p_offset_start );
1446
1447     /*
1448      * Perform conversion
1449      */
1450     i_scale_count = ( i_vscale == 1 ) ?
1451                     p_filter->fmt_out.video.i_height :
1452                     p_filter->fmt_in.video.i_height;
1453
1454 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1455
1456     if( p_filter->fmt_in.video.i_width & 15 )
1457     {
1458         i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
1459     }
1460     else
1461     {
1462         i_rewind = 0;
1463     }
1464
1465     /*
1466     ** SSE2 128 bits fetch/store instructions are faster
1467     ** if memory access is 16 bytes aligned
1468     */
1469
1470     p_buffer = b_hscale ? p_buffer_start : p_pic;
1471     if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1472                     p_dest->p->i_pitch|
1473                     ((intptr_t)p_y)|
1474                     ((intptr_t)p_buffer))) )
1475     {
1476         /* use faster SSE2 aligned fetch and store */
1477         for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1478         {
1479             p_pic_start = p_pic;
1480
1481             for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1482             {
1483                 SSE2_CALL (
1484                     SSE2_INIT_32_ALIGNED
1485                     SSE2_YUV_MUL
1486                     SSE2_YUV_ADD
1487                     SSE2_UNPACK_32_BGRA_ALIGNED
1488                 );
1489                 p_y += 16;
1490                 p_u += 8;
1491                 p_v += 8;
1492                 p_buffer += 16;
1493             }
1494
1495             /* Here we do some unaligned reads and duplicate conversions, but
1496              * at least we have all the pixels */
1497             if( i_rewind )
1498             {
1499                 p_y -= i_rewind;
1500                 p_u -= i_rewind >> 1;
1501                 p_v -= i_rewind >> 1;
1502                 p_buffer -= i_rewind;
1503                 SSE2_CALL (
1504                     SSE2_INIT_32_UNALIGNED
1505                     SSE2_YUV_MUL
1506                     SSE2_YUV_ADD
1507                     SSE2_UNPACK_32_BGRA_UNALIGNED
1508                 );
1509                 p_y += 16;
1510                 p_u += 4;
1511                 p_v += 4;
1512             }
1513             SCALE_WIDTH;
1514             SCALE_HEIGHT( 420, 4 );
1515
1516             p_y += i_source_margin;
1517             if( i_y % 2 )
1518             {
1519                 p_u += i_source_margin_c;
1520                 p_v += i_source_margin_c;
1521             }
1522             p_buffer = b_hscale ? p_buffer_start : p_pic;
1523         }
1524     }
1525     else
1526     {
1527         /* use slower SSE2 unaligned fetch and store */
1528         for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1529         {
1530             p_pic_start = p_pic;
1531             p_buffer = b_hscale ? p_buffer_start : p_pic;
1532
1533             for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1534             {
1535                 SSE2_CALL (
1536                     SSE2_INIT_32_UNALIGNED
1537                     SSE2_YUV_MUL
1538                     SSE2_YUV_ADD
1539                     SSE2_UNPACK_32_BGRA_UNALIGNED
1540                 );
1541                 p_y += 16;
1542                 p_u += 8;
1543                 p_v += 8;
1544                 p_buffer += 16;
1545             }
1546
1547             /* Here we do some unaligned reads and duplicate conversions, but
1548              * at least we have all the pixels */
1549             if( i_rewind )
1550             {
1551                 p_y -= i_rewind;
1552                 p_u -= i_rewind >> 1;
1553                 p_v -= i_rewind >> 1;
1554                 p_buffer -= i_rewind;
1555                 SSE2_CALL (
1556                     SSE2_INIT_32_UNALIGNED
1557                     SSE2_YUV_MUL
1558                     SSE2_YUV_ADD
1559                     SSE2_UNPACK_32_BGRA_UNALIGNED
1560                 );
1561                 p_y += 16;
1562                 p_u += 8;
1563                 p_v += 8;
1564             }
1565             SCALE_WIDTH;
1566             SCALE_HEIGHT( 420, 4 );
1567
1568             p_y += i_source_margin;
1569             if( i_y % 2 )
1570             {
1571                 p_u += i_source_margin_c;
1572                 p_v += i_source_margin_c;
1573             }
1574             p_buffer = b_hscale ? p_buffer_start : p_pic;
1575         }
1576     }
1577
1578 #else
1579
1580     if( p_filter->fmt_in.video.i_width & 7 )
1581     {
1582         i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
1583     }
1584     else
1585     {
1586         i_rewind = 0;
1587     }
1588
1589     for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1590     {
1591         p_pic_start = p_pic;
1592         p_buffer = b_hscale ? p_buffer_start : p_pic;
1593
1594         for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1595         {
1596             MMX_CALL (
1597                 MMX_INIT_32
1598                 MMX_YUV_MUL
1599                 MMX_YUV_ADD
1600                 MMX_UNPACK_32_BGRA
1601             );
1602             p_y += 8;
1603             p_u += 4;
1604             p_v += 4;
1605             p_buffer += 8;
1606         }
1607
1608         /* Here we do some unaligned reads and duplicate conversions, but
1609          * at least we have all the pixels */
1610         if( i_rewind )
1611         {
1612             p_y -= i_rewind;
1613             p_u -= i_rewind >> 1;
1614             p_v -= i_rewind >> 1;
1615             p_buffer -= i_rewind;
1616             MMX_CALL (
1617                 MMX_INIT_32
1618                 MMX_YUV_MUL
1619                 MMX_YUV_ADD
1620                 MMX_UNPACK_32_BGRA
1621             );
1622             p_y += 8;
1623             p_u += 4;
1624             p_v += 4;
1625             p_buffer += 8;
1626         }
1627         SCALE_WIDTH;
1628         SCALE_HEIGHT( 420, 4 );
1629
1630         p_y += i_source_margin;
1631         if( i_y % 2 )
1632         {
1633             p_u += i_source_margin_c;
1634             p_v += i_source_margin_c;
1635         }
1636     }
1637
1638     /* re-enable FPU registers */
1639     MMX_END;
1640
1641 #endif
1642 }
1643
1644 void I420_A8B8G8R8( filter_t *p_filter, picture_t *p_src,
1645                                             picture_t *p_dest )
1646 {
1647     /* We got this one from the old arguments */
1648     uint32_t *p_pic = (uint32_t*)p_dest->p->p_pixels;
1649     uint8_t  *p_y   = p_src->Y_PIXELS;
1650     uint8_t  *p_u   = p_src->U_PIXELS;
1651     uint8_t  *p_v   = p_src->V_PIXELS;
1652
1653     bool  b_hscale;                         /* horizontal scaling type */
1654     unsigned int i_vscale;                          /* vertical scaling type */
1655     unsigned int i_x, i_y;                /* horizontal and vertical indexes */
1656
1657     int         i_right_margin;
1658     int         i_rewind;
1659     int         i_scale_count;                       /* scale modulo counter */
1660     int         i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */
1661     uint32_t *  p_pic_start;       /* beginning of the current line for copy */
1662     /* Conversion buffer pointer */
1663     uint32_t *  p_buffer_start = (uint32_t*)p_filter->p_sys->p_buffer;
1664     uint32_t *  p_buffer;
1665
1666     /* Offset array pointer */
1667     int *       p_offset_start = p_filter->p_sys->p_offset;
1668     int *       p_offset;
1669
1670     const int i_source_margin = p_src->p[0].i_pitch
1671                                  - p_src->p[0].i_visible_pitch;
1672     const int i_source_margin_c = p_src->p[1].i_pitch
1673                                  - p_src->p[1].i_visible_pitch;
1674
1675     i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch;
1676
1677     /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered
1678      * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1'
1679      * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */
1680     SetOffset( p_filter->fmt_in.video.i_width,
1681                p_filter->fmt_in.video.i_height,
1682                p_filter->fmt_out.video.i_width,
1683                p_filter->fmt_out.video.i_height,
1684                &b_hscale, &i_vscale, p_offset_start );
1685
1686     /*
1687      * Perform conversion
1688      */
1689     i_scale_count = ( i_vscale == 1 ) ?
1690                     p_filter->fmt_out.video.i_height :
1691                     p_filter->fmt_in.video.i_height;
1692
1693 #if defined (MODULE_NAME_IS_i420_rgb_sse2)
1694
1695     if( p_filter->fmt_in.video.i_width & 15 )
1696     {
1697         i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 );
1698     }
1699     else
1700     {
1701         i_rewind = 0;
1702     }
1703
1704     /*
1705     ** SSE2 128 bits fetch/store instructions are faster
1706     ** if memory access is 16 bytes aligned
1707     */
1708
1709     p_buffer = b_hscale ? p_buffer_start : p_pic;
1710     if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch|
1711                     p_dest->p->i_pitch|
1712                     ((intptr_t)p_y)|
1713                     ((intptr_t)p_buffer))) )
1714     {
1715         /* use faster SSE2 aligned fetch and store */
1716         for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1717         {
1718             p_pic_start = p_pic;
1719
1720             for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1721             {
1722                 SSE2_CALL (
1723                     SSE2_INIT_32_ALIGNED
1724                     SSE2_YUV_MUL
1725                     SSE2_YUV_ADD
1726                     SSE2_UNPACK_32_ABGR_ALIGNED
1727                 );
1728                 p_y += 16;
1729                 p_u += 8;
1730                 p_v += 8;
1731                 p_buffer += 16;
1732             }
1733
1734             /* Here we do some unaligned reads and duplicate conversions, but
1735              * at least we have all the pixels */
1736             if( i_rewind )
1737             {
1738                 p_y -= i_rewind;
1739                 p_u -= i_rewind >> 1;
1740                 p_v -= i_rewind >> 1;
1741                 p_buffer -= i_rewind;
1742                 SSE2_CALL (
1743                     SSE2_INIT_32_UNALIGNED
1744                     SSE2_YUV_MUL
1745                     SSE2_YUV_ADD
1746                     SSE2_UNPACK_32_ABGR_UNALIGNED
1747                 );
1748                 p_y += 16;
1749                 p_u += 4;
1750                 p_v += 4;
1751             }
1752             SCALE_WIDTH;
1753             SCALE_HEIGHT( 420, 4 );
1754
1755             p_y += i_source_margin;
1756             if( i_y % 2 )
1757             {
1758                 p_u += i_source_margin_c;
1759                 p_v += i_source_margin_c;
1760             }
1761             p_buffer = b_hscale ? p_buffer_start : p_pic;
1762         }
1763     }
1764     else
1765     {
1766         /* use slower SSE2 unaligned fetch and store */
1767         for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1768         {
1769             p_pic_start = p_pic;
1770             p_buffer = b_hscale ? p_buffer_start : p_pic;
1771
1772             for ( i_x = p_filter->fmt_in.video.i_width / 16; i_x--; )
1773             {
1774                 SSE2_CALL (
1775                     SSE2_INIT_32_UNALIGNED
1776                     SSE2_YUV_MUL
1777                     SSE2_YUV_ADD
1778                     SSE2_UNPACK_32_ABGR_UNALIGNED
1779                 );
1780                 p_y += 16;
1781                 p_u += 8;
1782                 p_v += 8;
1783                 p_buffer += 16;
1784             }
1785
1786             /* Here we do some unaligned reads and duplicate conversions, but
1787              * at least we have all the pixels */
1788             if( i_rewind )
1789             {
1790                 p_y -= i_rewind;
1791                 p_u -= i_rewind >> 1;
1792                 p_v -= i_rewind >> 1;
1793                 p_buffer -= i_rewind;
1794                 SSE2_CALL (
1795                     SSE2_INIT_32_UNALIGNED
1796                     SSE2_YUV_MUL
1797                     SSE2_YUV_ADD
1798                     SSE2_UNPACK_32_ABGR_UNALIGNED
1799                 );
1800                 p_y += 16;
1801                 p_u += 8;
1802                 p_v += 8;
1803             }
1804             SCALE_WIDTH;
1805             SCALE_HEIGHT( 420, 4 );
1806
1807             p_y += i_source_margin;
1808             if( i_y % 2 )
1809             {
1810                 p_u += i_source_margin_c;
1811                 p_v += i_source_margin_c;
1812             }
1813             p_buffer = b_hscale ? p_buffer_start : p_pic;
1814         }
1815     }
1816
1817 #else
1818
1819     if( p_filter->fmt_in.video.i_width & 7 )
1820     {
1821         i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 );
1822     }
1823     else
1824     {
1825         i_rewind = 0;
1826     }
1827
1828     for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ )
1829     {
1830         p_pic_start = p_pic;
1831         p_buffer = b_hscale ? p_buffer_start : p_pic;
1832
1833         for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; )
1834         {
1835             MMX_CALL (
1836                 MMX_INIT_32
1837                 MMX_YUV_MUL
1838                 MMX_YUV_ADD
1839                 MMX_UNPACK_32_ABGR
1840             );
1841             p_y += 8;
1842             p_u += 4;
1843             p_v += 4;
1844             p_buffer += 8;
1845         }
1846
1847         /* Here we do some unaligned reads and duplicate conversions, but
1848          * at least we have all the pixels */
1849         if( i_rewind )
1850         {
1851             p_y -= i_rewind;
1852             p_u -= i_rewind >> 1;
1853             p_v -= i_rewind >> 1;
1854             p_buffer -= i_rewind;
1855             MMX_CALL (
1856                 MMX_INIT_32
1857                 MMX_YUV_MUL
1858                 MMX_YUV_ADD
1859                 MMX_UNPACK_32_ABGR
1860             );
1861             p_y += 8;
1862             p_u += 4;
1863             p_v += 4;
1864             p_buffer += 8;
1865         }
1866         SCALE_WIDTH;
1867         SCALE_HEIGHT( 420, 4 );
1868
1869         p_y += i_source_margin;
1870         if( i_y % 2 )
1871         {
1872             p_u += i_source_margin_c;
1873             p_v += i_source_margin_c;
1874         }
1875     }
1876
1877     /* re-enable FPU registers */
1878     MMX_END;
1879
1880 #endif
1881 }
1882
1883 #endif
1884
1885 /* Following functions are local */
1886
1887 /*****************************************************************************
1888  * SetOffset: build offset array for conversion functions
1889  *****************************************************************************
1890  * This function will build an offset array used in later conversion functions.
1891  * It will also set horizontal and vertical scaling indicators.
1892  *****************************************************************************/
1893 static void SetOffset( int i_width, int i_height, int i_pic_width,
1894                        int i_pic_height, bool *pb_hscale,
1895                        unsigned int *pi_vscale, int *p_offset )
1896 {
1897     int i_x;                                    /* x position in destination */
1898     int i_scale_count;                                     /* modulo counter */
1899
1900     /*
1901      * Prepare horizontal offset array
1902      */
1903     if( i_pic_width - i_width == 0 )
1904     {
1905         /* No horizontal scaling: YUV conversion is done directly to picture */
1906         *pb_hscale = 0;
1907     }
1908     else if( i_pic_width - i_width > 0 )
1909     {
1910         /* Prepare scaling array for horizontal extension */
1911         *pb_hscale = 1;
1912         i_scale_count = i_pic_width;
1913         for( i_x = i_width; i_x--; )
1914         {
1915             while( (i_scale_count -= i_width) > 0 )
1916             {
1917                 *p_offset++ = 0;
1918             }
1919             *p_offset++ = 1;
1920             i_scale_count += i_pic_width;
1921         }
1922     }
1923     else /* if( i_pic_width - i_width < 0 ) */
1924     {
1925         /* Prepare scaling array for horizontal reduction */
1926         *pb_hscale = 1;
1927         i_scale_count = i_width;
1928         for( i_x = i_pic_width; i_x--; )
1929         {
1930             *p_offset = 1;
1931             while( (i_scale_count -= i_pic_width) > 0 )
1932             {
1933                 *p_offset += 1;
1934             }
1935             p_offset++;
1936             i_scale_count += i_width;
1937         }
1938     }
1939
1940     /*
1941      * Set vertical scaling indicator
1942      */
1943     if( i_pic_height - i_height == 0 )
1944     {
1945         *pi_vscale = 0;
1946     }
1947     else if( i_pic_height - i_height > 0 )
1948     {
1949         *pi_vscale = 1;
1950     }
1951     else /* if( i_pic_height - i_height < 0 ) */
1952     {
1953         *pi_vscale = -1;
1954     }
1955 }
1956