gfx/2d/SkConvolver.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 // Copyright (c) 2011-2016 Google Inc.
   4 // Use of this source code is governed by a BSD-style license that can be
   5 // found in the gfx/skia/LICENSE file.
   6
   7 #include "SkConvolver.h"
   8
   9 #ifdef USE_SSE2
  10 #  include "mozilla/SSE.h"
  11 #endif
  12
  13 #ifdef USE_NEON
  14 #  include "mozilla/arm.h"
  15 #endif
  16
  17 namespace skia {
  18
  19 // Converts the argument to an 8-bit unsigned value by clamping to the range
  20 // 0-255.
  21 static inline unsigned char ClampTo8(int a) {
  22   if (static_cast<unsigned>(a) < 256) {
  23     return a;  // Avoid the extra check in the common case.
  24   }
  25   if (a < 0) {
  26     return 0;
  27   }
  28   return 255;
  29 }
  30
  31 // Convolves horizontally along a single row. The row data is given in
  32 // |srcData| and continues for the numValues() of the filter.
  33 template <bool hasAlpha>
  34 void ConvolveHorizontally(const unsigned char* srcData,
  35                           const SkConvolutionFilter1D& filter,
  36                           unsigned char* outRow) {
  37   // Loop over each pixel on this row in the output image.
  38   int numValues = filter.numValues();
  39   for (int outX = 0; outX < numValues; outX++) {
  40     // Get the filter that determines the current output pixel.
  41     int filterOffset, filterLength;
  42     const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
  43         filter.FilterForValue(outX, &filterOffset, &filterLength);
  44
  45     // Compute the first pixel in this row that the filter affects. It will
  46     // touch |filterLength| pixels (4 bytes each) after this.
  47     const unsigned char* rowToFilter = &srcData[filterOffset * 4];
  48
  49     // Apply the filter to the row to get the destination pixel in |accum|.
  50     int accum[4] = {0};
  51     for (int filterX = 0; filterX < filterLength; filterX++) {
  52       SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX];
  53       accum[0] += curFilter * rowToFilter[filterX * 4 + 0];
  54       accum[1] += curFilter * rowToFilter[filterX * 4 + 1];
  55       accum[2] += curFilter * rowToFilter[filterX * 4 + 2];
  56       if (hasAlpha) {
  57         accum[3] += curFilter * rowToFilter[filterX * 4 + 3];
  58       }
  59     }
  60
  61     // Bring this value back in range. All of the filter scaling factors
  62     // are in fixed point with kShiftBits bits of fractional part.
  63     accum[0] >>= SkConvolutionFilter1D::kShiftBits;
  64     accum[1] >>= SkConvolutionFilter1D::kShiftBits;
  65     accum[2] >>= SkConvolutionFilter1D::kShiftBits;
  66
  67     if (hasAlpha) {
  68       accum[3] >>= SkConvolutionFilter1D::kShiftBits;
  69     }
  70
  71     // Store the new pixel.
  72     outRow[outX * 4 + 0] = ClampTo8(accum[0]);
  73     outRow[outX * 4 + 1] = ClampTo8(accum[1]);
  74     outRow[outX * 4 + 2] = ClampTo8(accum[2]);
  75     if (hasAlpha) {
  76       outRow[outX * 4 + 3] = ClampTo8(accum[3]);
  77     }
  78   }
  79 }
  80
  81 // Does vertical convolution to produce one output row. The filter values and
  82 // length are given in the first two parameters. These are applied to each
  83 // of the rows pointed to in the |sourceDataRows| array, with each row
  84 // being |pixelWidth| wide.
  85 //
  86 // The output must have room for |pixelWidth * 4| bytes.
  87 template <bool hasAlpha>
  88 void ConvolveVertically(
  89     const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
  90     int filterLength, unsigned char* const* sourceDataRows, int pixelWidth,
  91     unsigned char* outRow) {
  92   // We go through each column in the output and do a vertical convolution,
  93   // generating one output pixel each time.
  94   for (int outX = 0; outX < pixelWidth; outX++) {
  95     // Compute the number of bytes over in each row that the current column
  96     // we're convolving starts at. The pixel will cover the next 4 bytes.
  97     int byteOffset = outX * 4;
  98
  99     // Apply the filter to one column of pixels.
 100     int accum[4] = {0};
 101     for (int filterY = 0; filterY < filterLength; filterY++) {
 102       SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY];
 103       accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0];
 104       accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1];
 105       accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2];
 106       if (hasAlpha) {
 107         accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3];
 108       }
 109     }
 110
 111     // Bring this value back in range. All of the filter scaling factors
 112     // are in fixed point with kShiftBits bits of precision.
 113     accum[0] >>= SkConvolutionFilter1D::kShiftBits;
 114     accum[1] >>= SkConvolutionFilter1D::kShiftBits;
 115     accum[2] >>= SkConvolutionFilter1D::kShiftBits;
 116     if (hasAlpha) {
 117       accum[3] >>= SkConvolutionFilter1D::kShiftBits;
 118     }
 119
 120     // Store the new pixel.
 121     outRow[byteOffset + 0] = ClampTo8(accum[0]);
 122     outRow[byteOffset + 1] = ClampTo8(accum[1]);
 123     outRow[byteOffset + 2] = ClampTo8(accum[2]);
 124
 125     if (hasAlpha) {
 126       unsigned char alpha = ClampTo8(accum[3]);
 127
 128       // Make sure the alpha channel doesn't come out smaller than any of the
 129       // color channels. We use premultipled alpha channels, so this should
 130       // never happen, but rounding errors will cause this from time to time.
 131       // These "impossible" colors will cause overflows (and hence random pixel
 132       // values) when the resulting bitmap is drawn to the screen.
 133       //
 134       // We only need to do this when generating the final output row (here).
 135       int maxColorChannel =
 136           std::max(outRow[byteOffset + 0],
 137                    std::max(outRow[byteOffset + 1], outRow[byteOffset + 2]));
 138       if (alpha < maxColorChannel) {
 139         outRow[byteOffset + 3] = maxColorChannel;
 140       } else {
 141         outRow[byteOffset + 3] = alpha;
 142       }
 143     } else {
 144       // No alpha channel, the image is opaque.
 145       outRow[byteOffset + 3] = 0xff;
 146     }
 147   }
 148 }
 149
 150 #ifdef USE_SSE2
 151 void convolve_vertically_avx2(const int16_t* filter, int filterLen,
 152                               uint8_t* const* srcRows, int width, uint8_t* out,
 153                               bool hasAlpha);
 154 void convolve_horizontally_sse2(const unsigned char* srcData,
 155                                 const SkConvolutionFilter1D& filter,
 156                                 unsigned char* outRow, bool hasAlpha);
 157 void convolve_vertically_sse2(const int16_t* filter, int filterLen,
 158                               uint8_t* const* srcRows, int width, uint8_t* out,
 159                               bool hasAlpha);
 160 #elif defined(USE_NEON)
 161 void convolve_horizontally_neon(const unsigned char* srcData,
 162                                 const SkConvolutionFilter1D& filter,
 163                                 unsigned char* outRow, bool hasAlpha);
 164 void convolve_vertically_neon(const int16_t* filter, int filterLen,
 165                               uint8_t* const* srcRows, int width, uint8_t* out,
 166                               bool hasAlpha);
 167 #endif
 168
 169 void convolve_horizontally(const unsigned char* srcData,
 170                            const SkConvolutionFilter1D& filter,
 171                            unsigned char* outRow, bool hasAlpha) {
 172 #ifdef USE_SSE2
 173   if (mozilla::supports_sse2()) {
 174     convolve_horizontally_sse2(srcData, filter, outRow, hasAlpha);
 175     return;
 176   }
 177 #elif defined(USE_NEON)
 178   if (mozilla::supports_neon()) {
 179     convolve_horizontally_neon(srcData, filter, outRow, hasAlpha);
 180     return;
 181   }
 182 #endif
 183   if (hasAlpha) {
 184     ConvolveHorizontally<true>(srcData, filter, outRow);
 185   } else {
 186     ConvolveHorizontally<false>(srcData, filter, outRow);
 187   }
 188 }
 189
 190 void convolve_vertically(
 191     const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
 192     int filterLength, unsigned char* const* sourceDataRows, int pixelWidth,
 193     unsigned char* outRow, bool hasAlpha) {
 194 #ifdef USE_SSE2
 195   if (mozilla::supports_avx2()) {
 196     convolve_vertically_avx2(filterValues, filterLength, sourceDataRows,
 197                              pixelWidth, outRow, hasAlpha);
 198     return;
 199   }
 200   if (mozilla::supports_sse2()) {
 201     convolve_vertically_sse2(filterValues, filterLength, sourceDataRows,
 202                              pixelWidth, outRow, hasAlpha);
 203     return;
 204   }
 205 #elif defined(USE_NEON)
 206   if (mozilla::supports_neon()) {
 207     convolve_vertically_neon(filterValues, filterLength, sourceDataRows,
 208                              pixelWidth, outRow, hasAlpha);
 209     return;
 210   }
 211 #endif
 212   if (hasAlpha) {
 213     ConvolveVertically<true>(filterValues, filterLength, sourceDataRows,
 214                              pixelWidth, outRow);
 215   } else {
 216     ConvolveVertically<false>(filterValues, filterLength, sourceDataRows,
 217                               pixelWidth, outRow);
 218   }
 219 }
 220
 221 // Stores a list of rows in a circular buffer. The usage is you write into it
 222 // by calling AdvanceRow. It will keep track of which row in the buffer it
 223 // should use next, and the total number of rows added.
 224 class CircularRowBuffer {
 225  public:
 226   // The number of pixels in each row is given in |sourceRowPixelWidth|.
 227   // The maximum number of rows needed in the buffer is |maxYFilterSize|
 228   // (we only need to store enough rows for the biggest filter).
 229   //
 230   // We use the |firstInputRow| to compute the coordinates of all of the
 231   // following rows returned by Advance().
 232   CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize,
 233                     int firstInputRow)
 234       : fRowByteWidth(destRowPixelWidth * 4),
 235         fNumRows(maxYFilterSize),
 236         fNextRow(0),
 237         fNextRowCoordinate(firstInputRow) {}
 238
 239   bool AllocBuffer() {
 240     return fBuffer.resize(fRowByteWidth * fNumRows) &&
 241            fRowAddresses.resize(fNumRows);
 242   }
 243
 244   // Moves to the next row in the buffer, returning a pointer to the beginning
 245   // of it.
 246   unsigned char* advanceRow() {
 247     unsigned char* row = &fBuffer[fNextRow * fRowByteWidth];
 248     fNextRowCoordinate++;
 249
 250     // Set the pointer to the next row to use, wrapping around if necessary.
 251     fNextRow++;
 252     if (fNextRow == fNumRows) {
 253       fNextRow = 0;
 254     }
 255     return row;
 256   }
 257
 258   // Returns a pointer to an "unrolled" array of rows. These rows will start
 259   // at the y coordinate placed into |*firstRowIndex| and will continue in
 260   // order for the maximum number of rows in this circular buffer.
 261   //
 262   // The |firstRowIndex_| may be negative. This means the circular buffer
 263   // starts before the top of the image (it hasn't been filled yet).
 264   unsigned char* const* GetRowAddresses(int* firstRowIndex) {
 265     // Example for a 4-element circular buffer holding coords 6-9.
 266     //   Row 0   Coord 8
 267     //   Row 1   Coord 9
 268     //   Row 2   Coord 6  <- fNextRow = 2, fNextRowCoordinate = 10.
 269     //   Row 3   Coord 7
 270     //
 271     // The "next" row is also the first (lowest) coordinate. This computation
 272     // may yield a negative value, but that's OK, the math will work out
 273     // since the user of this buffer will compute the offset relative
 274     // to the firstRowIndex and the negative rows will never be used.
 275     *firstRowIndex = fNextRowCoordinate - fNumRows;
 276
 277     int curRow = fNextRow;
 278     for (int i = 0; i < fNumRows; i++) {
 279       fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth];
 280
 281       // Advance to the next row, wrapping if necessary.
 282       curRow++;
 283       if (curRow == fNumRows) {
 284         curRow = 0;
 285       }
 286     }
 287     return &fRowAddresses[0];
 288   }
 289
 290  private:
 291   // The buffer storing the rows. They are packed, each one fRowByteWidth.
 292   mozilla::Vector<unsigned char> fBuffer;
 293
 294   // Number of bytes per row in the |buffer|.
 295   int fRowByteWidth;
 296
 297   // The number of rows available in the buffer.
 298   int fNumRows;
 299
 300   // The next row index we should write into. This wraps around as the
 301   // circular buffer is used.
 302   int fNextRow;
 303
 304   // The y coordinate of the |fNextRow|. This is incremented each time a
 305   // new row is appended and does not wrap.
 306   int fNextRowCoordinate;
 307
 308   // Buffer used by GetRowAddresses().
 309   mozilla::Vector<unsigned char*> fRowAddresses;
 310 };
 311
 312 SkConvolutionFilter1D::SkConvolutionFilter1D() : fMaxFilter(0) {}
 313
 314 SkConvolutionFilter1D::~SkConvolutionFilter1D() = default;
 315
 316 bool SkConvolutionFilter1D::AddFilter(int filterOffset,
 317                                       const ConvolutionFixed* filterValues,
 318                                       int filterLength) {
 319   // It is common for leading/trailing filter values to be zeros. In such
 320   // cases it is beneficial to only store the central factors.
 321   // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on
 322   // a 1080p image this optimization gives a ~10% speed improvement.
 323   int filterSize = filterLength;
 324   int firstNonZero = 0;
 325   while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) {
 326     firstNonZero++;
 327   }
 328
 329   if (firstNonZero < filterLength) {
 330     // Here we have at least one non-zero factor.
 331     int lastNonZero = filterLength - 1;
 332     while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) {
 333       lastNonZero--;
 334     }
 335
 336     filterOffset += firstNonZero;
 337     filterLength = lastNonZero + 1 - firstNonZero;
 338     MOZ_ASSERT(filterLength > 0);
 339
 340     if (!fFilterValues.append(&filterValues[firstNonZero], filterLength)) {
 341       return false;
 342     }
 343   } else {
 344     // Here all the factors were zeroes.
 345     filterLength = 0;
 346   }
 347
 348   FilterInstance instance = {
 349       // We pushed filterLength elements onto fFilterValues
 350       int(fFilterValues.length()) - filterLength, filterOffset, filterLength,
 351       filterSize};
 352   if (!fFilters.append(instance)) {
 353     if (filterLength > 0) {
 354       fFilterValues.shrinkBy(filterLength);
 355     }
 356     return false;
 357   }
 358
 359   fMaxFilter = std::max(fMaxFilter, filterLength);
 360   return true;
 361 }
 362
 363 bool SkConvolutionFilter1D::ComputeFilterValues(
 364     const SkBitmapFilter& aBitmapFilter, int32_t aSrcSize, int32_t aDstSize) {
 365   // When we're doing a magnification, the scale will be larger than one. This
 366   // means the destination pixels are much smaller than the source pixels, and
 367   // that the range covered by the filter won't necessarily cover any source
 368   // pixel boundaries. Therefore, we use these clamped values (max of 1) for
 369   // some computations.
 370   float scale = float(aDstSize) / float(aSrcSize);
 371   float clampedScale = std::min(1.0f, scale);
 372   // This is how many source pixels from the center we need to count
 373   // to support the filtering function.
 374   float srcSupport = aBitmapFilter.width() / clampedScale;
 375   float invScale = 1.0f / scale;
 376
 377   mozilla::Vector<float, 64> filterValues;
 378   mozilla::Vector<ConvolutionFixed, 64> fixedFilterValues;
 379
 380   // Loop over all pixels in the output range. We will generate one set of
 381   // filter values for each one. Those values will tell us how to blend the
 382   // source pixels to compute the destination pixel.
 383
 384   // This value is computed based on how SkTDArray::resizeStorageToAtLeast works
 385   // in order to ensure that it does not overflow or assert. That functions
 386   // computes
 387   //   n+4 + (n+4)/4
 388   // and we want to to fit in a 32 bit signed int. Equating that to 2^31-1 and
 389   // solving n gives n = (2^31-6)*4/5 = 1717986913.6
 390   const int32_t maxToPassToReserveAdditional = 1717986913;
 391
 392   int32_t filterValueCount = int32_t(ceilf(aDstSize * srcSupport * 2));
 393   if (aDstSize > maxToPassToReserveAdditional || filterValueCount < 0 ||
 394       filterValueCount > maxToPassToReserveAdditional ||
 395       !reserveAdditional(aDstSize, filterValueCount)) {
 396     return false;
 397   }
 398   size_t oldFiltersLength = fFilters.length();
 399   size_t oldFilterValuesLength = fFilterValues.length();
 400   int oldMaxFilter = fMaxFilter;
 401   for (int32_t destI = 0; destI < aDstSize; destI++) {
 402     // This is the pixel in the source directly under the pixel in the dest.
 403     // Note that we base computations on the "center" of the pixels. To see
 404     // why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
 405     // downscale should "cover" the pixels around the pixel with *its center*
 406     // at coordinates (2.5, 2.5) in the source, not those around (0, 0).
 407     // Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
 408     float srcPixel = (static_cast<float>(destI) + 0.5f) * invScale;
 409
 410     // Compute the (inclusive) range of source pixels the filter covers.
 411     float srcBegin = std::max(0.0f, floorf(srcPixel - srcSupport));
 412     float srcEnd = std::min(aSrcSize - 1.0f, ceilf(srcPixel + srcSupport));
 413
 414     // Compute the unnormalized filter value at each location of the source
 415     // it covers.
 416
 417     // Sum of the filter values for normalizing.
 418     // Distance from the center of the filter, this is the filter coordinate
 419     // in source space. We also need to consider the center of the pixel
 420     // when comparing distance against 'srcPixel'. In the 5x downscale
 421     // example used above the distance from the center of the filter to
 422     // the pixel with coordinates (2, 2) should be 0, because its center
 423     // is at (2.5, 2.5).
 424     int32_t filterCount = int32_t(srcEnd - srcBegin) + 1;
 425     if (filterCount <= 0 || !filterValues.resize(filterCount) ||
 426         !fixedFilterValues.resize(filterCount)) {
 427       return false;
 428     }
 429
 430     float destFilterDist = (srcBegin + 0.5f - srcPixel) * clampedScale;
 431     float filterSum = 0.0f;
 432     for (int32_t index = 0; index < filterCount; index++) {
 433       float filterValue = aBitmapFilter.evaluate(destFilterDist);
 434       filterValues[index] = filterValue;
 435       filterSum += filterValue;
 436       destFilterDist += clampedScale;
 437     }
 438
 439     // The filter must be normalized so that we don't affect the brightness of
 440     // the image. Convert to normalized fixed point.
 441     ConvolutionFixed fixedSum = 0;
 442     float invFilterSum = 1.0f / filterSum;
 443     for (int32_t fixedI = 0; fixedI < filterCount; fixedI++) {
 444       ConvolutionFixed curFixed = ToFixed(filterValues[fixedI] * invFilterSum);
 445       fixedSum += curFixed;
 446       fixedFilterValues[fixedI] = curFixed;
 447     }
 448
 449     // The conversion to fixed point will leave some rounding errors, which
 450     // we add back in to avoid affecting the brightness of the image. We
 451     // arbitrarily add this to the center of the filter array (this won't always
 452     // be the center of the filter function since it could get clipped on the
 453     // edges, but it doesn't matter enough to worry about that case).
 454     ConvolutionFixed leftovers = ToFixed(1) - fixedSum;
 455     fixedFilterValues[filterCount / 2] += leftovers;
 456
 457     if (!AddFilter(int32_t(srcBegin), fixedFilterValues.begin(), filterCount)) {
 458       fFilters.shrinkTo(oldFiltersLength);
 459       fFilterValues.shrinkTo(oldFilterValuesLength);
 460       fMaxFilter = oldMaxFilter;
 461       return false;
 462     }
 463   }
 464
 465   return maxFilter() > 0 && numValues() == aDstSize;
 466 }
 467
 468 // Does a two-dimensional convolution on the given source image.
 469 //
 470 // It is assumed the source pixel offsets referenced in the input filters
 471 // reference only valid pixels, so the source image size is not required. Each
 472 // row of the source image starts |sourceByteRowStride| after the previous
 473 // one (this allows you to have rows with some padding at the end).
 474 //
 475 // The result will be put into the given output buffer. The destination image
 476 // size will be xfilter.numValues() * yfilter.numValues() pixels. It will be
 477 // in rows of exactly xfilter.numValues() * 4 bytes.
 478 //
 479 // |sourceHasAlpha| is a hint that allows us to avoid doing computations on
 480 // the alpha channel if the image is opaque. If you don't know, set this to
 481 // true and it will work properly, but setting this to false will be a few
 482 // percent faster if you know the image is opaque.
 483 //
 484 // The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order
 485 // (this is ARGB when loaded into 32-bit words on a little-endian machine).
 486 /**
 487  *  Returns false if it was unable to perform the convolution/rescale. in which
 488  * case the output buffer is assumed to be undefined.
 489  */
 490 bool BGRAConvolve2D(const unsigned char* sourceData, int sourceByteRowStride,
 491                     bool sourceHasAlpha, const SkConvolutionFilter1D& filterX,
 492                     const SkConvolutionFilter1D& filterY,
 493                     int outputByteRowStride, unsigned char* output) {
 494   int maxYFilterSize = filterY.maxFilter();
 495
 496   // The next row in the input that we will generate a horizontally
 497   // convolved row for. If the filter doesn't start at the beginning of the
 498   // image (this is the case when we are only resizing a subset), then we
 499   // don't want to generate any output rows before that. Compute the starting
 500   // row for convolution as the first pixel for the first vertical filter.
 501   int filterOffset = 0, filterLength = 0;
 502   const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
 503       filterY.FilterForValue(0, &filterOffset, &filterLength);
 504   int nextXRow = filterOffset;
 505
 506   // We loop over each row in the input doing a horizontal convolution. This
 507   // will result in a horizontally convolved image. We write the results into
 508   // a circular buffer of convolved rows and do vertical convolution as rows
 509   // are available. This prevents us from having to store the entire
 510   // intermediate image and helps cache coherency.
 511   // We will need four extra rows to allow horizontal convolution could be done
 512   // simultaneously. We also pad each row in row buffer to be aligned-up to
 513   // 32 bytes.
 514   // TODO(jiesun): We do not use aligned load from row buffer in vertical
 515   // convolution pass yet. Somehow Windows does not like it.
 516   int rowBufferWidth = (filterX.numValues() + 31) & ~0x1F;
 517   int rowBufferHeight = maxYFilterSize;
 518
 519   // check for too-big allocation requests : crbug.com/528628
 520   {
 521     int64_t size = int64_t(rowBufferWidth) * int64_t(rowBufferHeight);
 522     // need some limit, to avoid over-committing success from malloc, but then
 523     // crashing when we try to actually use the memory.
 524     // 100meg seems big enough to allow "normal" zoom factors and image sizes
 525     // through while avoiding the crash seen by the bug (crbug.com/528628)
 526     if (size > 100 * 1024 * 1024) {
 527       //            printf_stderr("BGRAConvolve2D: tmp allocation [%lld] too
 528       //            big\n", size);
 529       return false;
 530     }
 531   }
 532
 533   CircularRowBuffer rowBuffer(rowBufferWidth, rowBufferHeight, filterOffset);
 534   if (!rowBuffer.AllocBuffer()) {
 535     return false;
 536   }
 537
 538   // Loop over every possible output row, processing just enough horizontal
 539   // convolutions to run each subsequent vertical convolution.
 540   MOZ_ASSERT(outputByteRowStride >= filterX.numValues() * 4);
 541   int numOutputRows = filterY.numValues();
 542
 543   // We need to check which is the last line to convolve before we advance 4
 544   // lines in one iteration.
 545   int lastFilterOffset, lastFilterLength;
 546   filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,
 547                          &lastFilterLength);
 548
 549   for (int outY = 0; outY < numOutputRows; outY++) {
 550     filterValues = filterY.FilterForValue(outY, &filterOffset, &filterLength);
 551
 552     // Generate output rows until we have enough to run the current filter.
 553     while (nextXRow < filterOffset + filterLength) {
 554       convolve_horizontally(
 555           &sourceData[(uint64_t)nextXRow * sourceByteRowStride], filterX,
 556           rowBuffer.advanceRow(), sourceHasAlpha);
 557       nextXRow++;
 558     }
 559
 560     // Compute where in the output image this row of final data will go.
 561     unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStride];
 562
 563     // Get the list of rows that the circular buffer has, in order.
 564     int firstRowInCircularBuffer;
 565     unsigned char* const* rowsToConvolve =
 566         rowBuffer.GetRowAddresses(&firstRowInCircularBuffer);
 567
 568     // Now compute the start of the subset of those rows that the filter needs.
 569     unsigned char* const* firstRowForFilter =
 570         &rowsToConvolve[filterOffset - firstRowInCircularBuffer];
 571
 572     convolve_vertically(filterValues, filterLength, firstRowForFilter,
 573                         filterX.numValues(), curOutputRow, sourceHasAlpha);
 574   }
 575   return true;
 576 }
 577
 578 }  // namespace skia