vp8/common/filter.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11
  12 #include "filter.h"
  13 #include "./vp8_rtcd.h"
  14
  15 DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
  16 {
  17     { 128,   0 },
  18     { 112,  16 },
  19     {  96,  32 },
  20     {  80,  48 },
  21     {  64,  64 },
  22     {  48,  80 },
  23     {  32,  96 },
  24     {  16, 112 }
  25 };
  26
  27 DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
  28 {
  29
  30     { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
  31     { 0, -6,  123,   12,  -1,  0 },
  32     { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
  33     { 0, -9,   93,   50,  -6,  0 },
  34     { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
  35     { 0, -6,   50,   93,  -9,  0 },
  36     { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
  37     { 0, -1,   12,  123,  -6,  0 },
  38 };
  39
  40 static void filter_block2d_first_pass
  41 (
  42     unsigned char *src_ptr,
  43     int *output_ptr,
  44     unsigned int src_pixels_per_line,
  45     unsigned int pixel_step,
  46     unsigned int output_height,
  47     unsigned int output_width,
  48     const short *vp8_filter
  49 )
  50 {
  51     unsigned int i, j;
  52     int  Temp;
  53
  54     for (i = 0; i < output_height; i++)
  55     {
  56         for (j = 0; j < output_width; j++)
  57         {
  58             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
  59                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
  60                    ((int)src_ptr[0]                 * vp8_filter[2]) +
  61                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
  62                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
  63                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
  64                    (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
  65
  66             /* Normalize back to 0-255 */
  67             Temp = Temp >> VP8_FILTER_SHIFT;
  68
  69             if (Temp < 0)
  70                 Temp = 0;
  71             else if (Temp > 255)
  72                 Temp = 255;
  73
  74             output_ptr[j] = Temp;
  75             src_ptr++;
  76         }
  77
  78         /* Next row... */
  79         src_ptr    += src_pixels_per_line - output_width;
  80         output_ptr += output_width;
  81     }
  82 }
  83
  84 static void filter_block2d_second_pass
  85 (
  86     int *src_ptr,
  87     unsigned char *output_ptr,
  88     int output_pitch,
  89     unsigned int src_pixels_per_line,
  90     unsigned int pixel_step,
  91     unsigned int output_height,
  92     unsigned int output_width,
  93     const short *vp8_filter
  94 )
  95 {
  96     unsigned int i, j;
  97     int  Temp;
  98
  99     for (i = 0; i < output_height; i++)
 100     {
 101         for (j = 0; j < output_width; j++)
 102         {
 103             /* Apply filter */
 104             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
 105                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
 106                    ((int)src_ptr[0]                 * vp8_filter[2]) +
 107                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
 108                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
 109                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
 110                    (VP8_FILTER_WEIGHT >> 1);   /* Rounding */
 111
 112             /* Normalize back to 0-255 */
 113             Temp = Temp >> VP8_FILTER_SHIFT;
 114
 115             if (Temp < 0)
 116                 Temp = 0;
 117             else if (Temp > 255)
 118                 Temp = 255;
 119
 120             output_ptr[j] = (unsigned char)Temp;
 121             src_ptr++;
 122         }
 123
 124         /* Start next row */
 125         src_ptr    += src_pixels_per_line - output_width;
 126         output_ptr += output_pitch;
 127     }
 128 }
 129
 130
 131 static void filter_block2d
 132 (
 133     unsigned char  *src_ptr,
 134     unsigned char  *output_ptr,
 135     unsigned int src_pixels_per_line,
 136     int output_pitch,
 137     const short  *HFilter,
 138     const short  *VFilter
 139 )
 140 {
 141     int FData[9*4]; /* Temp data buffer used in filtering */
 142
 143     /* First filter 1-D horizontally... */
 144     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
 145
 146     /* then filter verticaly... */
 147     filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
 148 }
 149
 150
 151 void vp8_sixtap_predict4x4_c
 152 (
 153     unsigned char  *src_ptr,
 154     int   src_pixels_per_line,
 155     int  xoffset,
 156     int  yoffset,
 157     unsigned char *dst_ptr,
 158     int dst_pitch
 159 )
 160 {
 161     const short  *HFilter;
 162     const short  *VFilter;
 163
 164     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
 165     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
 166
 167     filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
 168 }
 169 void vp8_sixtap_predict8x8_c
 170 (
 171     unsigned char  *src_ptr,
 172     int  src_pixels_per_line,
 173     int  xoffset,
 174     int  yoffset,
 175     unsigned char *dst_ptr,
 176     int  dst_pitch
 177 )
 178 {
 179     const short  *HFilter;
 180     const short  *VFilter;
 181     int FData[13*16];   /* Temp data buffer used in filtering */
 182
 183     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
 184     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
 185
 186     /* First filter 1-D horizontally... */
 187     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
 188
 189
 190     /* then filter verticaly... */
 191     filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
 192
 193 }
 194
 195 void vp8_sixtap_predict8x4_c
 196 (
 197     unsigned char  *src_ptr,
 198     int  src_pixels_per_line,
 199     int  xoffset,
 200     int  yoffset,
 201     unsigned char *dst_ptr,
 202     int  dst_pitch
 203 )
 204 {
 205     const short  *HFilter;
 206     const short  *VFilter;
 207     int FData[13*16];   /* Temp data buffer used in filtering */
 208
 209     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
 210     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
 211
 212     /* First filter 1-D horizontally... */
 213     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
 214
 215
 216     /* then filter verticaly... */
 217     filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
 218
 219 }
 220
 221 void vp8_sixtap_predict16x16_c
 222 (
 223     unsigned char  *src_ptr,
 224     int  src_pixels_per_line,
 225     int  xoffset,
 226     int  yoffset,
 227     unsigned char *dst_ptr,
 228     int  dst_pitch
 229 )
 230 {
 231     const short  *HFilter;
 232     const short  *VFilter;
 233     int FData[21*24];   /* Temp data buffer used in filtering */
 234
 235
 236     HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
 237     VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
 238
 239     /* First filter 1-D horizontally... */
 240     filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
 241
 242     /* then filter verticaly... */
 243     filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
 244
 245 }
 246
 247
 248 /****************************************************************************
 249  *
 250  *  ROUTINE       : filter_block2d_bil_first_pass
 251  *
 252  *  INPUTS        : UINT8  *src_ptr    : Pointer to source block.
 253  *                  UINT32  src_stride : Stride of source block.
 254  *                  UINT32  height     : Block height.
 255  *                  UINT32  width      : Block width.
 256  *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
 257  *
 258  *  OUTPUTS       : INT32  *dst_ptr    : Pointer to filtered block.
 259  *
 260  *  RETURNS       : void
 261  *
 262  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
 263  *                  in the horizontal direction to produce the filtered output
 264  *                  block. Used to implement first-pass of 2-D separable filter.
 265  *
 266  *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
 267  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
 268  *
 269  ****************************************************************************/
 270 static void filter_block2d_bil_first_pass
 271 (
 272     unsigned char  *src_ptr,
 273     unsigned short *dst_ptr,
 274     unsigned int    src_stride,
 275     unsigned int    height,
 276     unsigned int    width,
 277     const short    *vp8_filter
 278 )
 279 {
 280     unsigned int i, j;
 281
 282     for (i = 0; i < height; i++)
 283     {
 284         for (j = 0; j < width; j++)
 285         {
 286             /* Apply bilinear filter */
 287             dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
 288                           ((int)src_ptr[1] * vp8_filter[1]) +
 289                           (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
 290             src_ptr++;
 291         }
 292
 293         /* Next row... */
 294         src_ptr += src_stride - width;
 295         dst_ptr += width;
 296     }
 297 }
 298
 299 /****************************************************************************
 300  *
 301  *  ROUTINE       : filter_block2d_bil_second_pass
 302  *
 303  *  INPUTS        : INT32  *src_ptr    : Pointer to source block.
 304  *                  UINT32  dst_pitch  : Destination block pitch.
 305  *                  UINT32  height     : Block height.
 306  *                  UINT32  width      : Block width.
 307  *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
 308  *
 309  *  OUTPUTS       : UINT16 *dst_ptr    : Pointer to filtered block.
 310  *
 311  *  RETURNS       : void
 312  *
 313  *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
 314  *                  in the vertical direction to produce the filtered output
 315  *                  block. Used to implement second-pass of 2-D separable filter.
 316  *
 317  *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
 318  *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
 319  *
 320  ****************************************************************************/
 321 static void filter_block2d_bil_second_pass
 322 (
 323     unsigned short *src_ptr,
 324     unsigned char  *dst_ptr,
 325     int             dst_pitch,
 326     unsigned int    height,
 327     unsigned int    width,
 328     const short    *vp8_filter
 329 )
 330 {
 331     unsigned int  i, j;
 332     int  Temp;
 333
 334     for (i = 0; i < height; i++)
 335     {
 336         for (j = 0; j < width; j++)
 337         {
 338             /* Apply filter */
 339             Temp = ((int)src_ptr[0]     * vp8_filter[0]) +
 340                    ((int)src_ptr[width] * vp8_filter[1]) +
 341                    (VP8_FILTER_WEIGHT / 2);
 342             dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
 343             src_ptr++;
 344         }
 345
 346         /* Next row... */
 347         dst_ptr += dst_pitch;
 348     }
 349 }
 350
 351
 352 /****************************************************************************
 353  *
 354  *  ROUTINE       : filter_block2d_bil
 355  *
 356  *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
 357  *                  UINT32  src_pitch        : Stride of source block.
 358  *                  UINT32  dst_pitch        : Stride of destination block.
 359  *                  INT32  *HFilter          : Array of 2 horizontal filter taps.
 360  *                  INT32  *VFilter          : Array of 2 vertical filter taps.
 361  *                  INT32  Width             : Block width
 362  *                  INT32  Height            : Block height
 363  *
 364  *  OUTPUTS       : UINT16 *dst_ptr       : Pointer to filtered block.
 365  *
 366  *  RETURNS       : void
 367  *
 368  *  FUNCTION      : 2-D filters an input block by applying a 2-tap
 369  *                  bi-linear filter horizontally followed by a 2-tap
 370  *                  bi-linear filter vertically on the result.
 371  *
 372  *  SPECIAL NOTES : The largest block size can be handled here is 16x16
 373  *
 374  ****************************************************************************/
 375 static void filter_block2d_bil
 376 (
 377     unsigned char *src_ptr,
 378     unsigned char *dst_ptr,
 379     unsigned int   src_pitch,
 380     unsigned int   dst_pitch,
 381     const short   *HFilter,
 382     const short   *VFilter,
 383     int            Width,
 384     int            Height
 385 )
 386 {
 387
 388     unsigned short FData[17*16];    /* Temp data buffer used in filtering */
 389
 390     /* First filter 1-D horizontally... */
 391     filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
 392
 393     /* then 1-D vertically... */
 394     filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
 395 }
 396
 397
 398 void vp8_bilinear_predict4x4_c
 399 (
 400     unsigned char  *src_ptr,
 401     int   src_pixels_per_line,
 402     int  xoffset,
 403     int  yoffset,
 404     unsigned char *dst_ptr,
 405     int dst_pitch
 406 )
 407 {
 408     const short *HFilter;
 409     const short *VFilter;
 410
 411     HFilter = vp8_bilinear_filters[xoffset];
 412     VFilter = vp8_bilinear_filters[yoffset];
 413 #if 0
 414     {
 415         int i;
 416         unsigned char temp1[16];
 417         unsigned char temp2[16];
 418
 419         bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
 420         filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
 421
 422         for (i = 0; i < 16; i++)
 423         {
 424             if (temp1[i] != temp2[i])
 425             {
 426                 bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
 427                 filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
 428             }
 429         }
 430     }
 431 #endif
 432     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
 433
 434 }
 435
 436 void vp8_bilinear_predict8x8_c
 437 (
 438     unsigned char  *src_ptr,
 439     int  src_pixels_per_line,
 440     int  xoffset,
 441     int  yoffset,
 442     unsigned char *dst_ptr,
 443     int  dst_pitch
 444 )
 445 {
 446     const short *HFilter;
 447     const short *VFilter;
 448
 449     HFilter = vp8_bilinear_filters[xoffset];
 450     VFilter = vp8_bilinear_filters[yoffset];
 451
 452     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
 453
 454 }
 455
 456 void vp8_bilinear_predict8x4_c
 457 (
 458     unsigned char  *src_ptr,
 459     int  src_pixels_per_line,
 460     int  xoffset,
 461     int  yoffset,
 462     unsigned char *dst_ptr,
 463     int  dst_pitch
 464 )
 465 {
 466     const short *HFilter;
 467     const short *VFilter;
 468
 469     HFilter = vp8_bilinear_filters[xoffset];
 470     VFilter = vp8_bilinear_filters[yoffset];
 471
 472     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
 473
 474 }
 475
 476 void vp8_bilinear_predict16x16_c
 477 (
 478     unsigned char  *src_ptr,
 479     int  src_pixels_per_line,
 480     int  xoffset,
 481     int  yoffset,
 482     unsigned char *dst_ptr,
 483     int  dst_pitch
 484 )
 485 {
 486     const short *HFilter;
 487     const short *VFilter;
 488
 489     HFilter = vp8_bilinear_filters[xoffset];
 490     VFilter = vp8_bilinear_filters[yoffset];
 491
 492     filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
 493 }