gfx/2d/SkConvolver.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 // Copyright (c) 2011-2016 Google Inc.
   4 // Use of this source code is governed by a BSD-style license that can be
   5 // found in the gfx/skia/LICENSE file.
   6
   7 #include "SkConvolver.h"
   8 #include "mozilla/Vector.h"
   9
  10 #ifdef USE_SSE2
  11 #  include "mozilla/SSE.h"
  12 #endif
  13
  14 #ifdef USE_NEON
  15 #  include "mozilla/arm.h"
  16 #endif
  17
  18 namespace skia {
  19
  20 // Converts the argument to an 8-bit unsigned value by clamping to the range
  21 // 0-255.
  22 static inline unsigned char ClampTo8(int a) {
  23   if (static_cast<unsigned>(a) < 256) {
  24     return a;  // Avoid the extra check in the common case.
  25   }
  26   if (a < 0) {
  27     return 0;
  28   }
  29   return 255;
  30 }
  31
  32 // Convolves horizontally along a single row. The row data is given in
  33 // |srcData| and continues for the numValues() of the filter.
  34 template <bool hasAlpha>
  35 void ConvolveHorizontally(const unsigned char* srcData,
  36                           const SkConvolutionFilter1D& filter,
  37                           unsigned char* outRow) {
  38   // Loop over each pixel on this row in the output image.
  39   int numValues = filter.numValues();
  40   for (int outX = 0; outX < numValues; outX++) {
  41     // Get the filter that determines the current output pixel.
  42     int filterOffset, filterLength;
  43     const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
  44         filter.FilterForValue(outX, &filterOffset, &filterLength);
  45
  46     // Compute the first pixel in this row that the filter affects. It will
  47     // touch |filterLength| pixels (4 bytes each) after this.
  48     const unsigned char* rowToFilter = &srcData[filterOffset * 4];
  49
  50     // Apply the filter to the row to get the destination pixel in |accum|.
  51     int accum[4] = {0};
  52     for (int filterX = 0; filterX < filterLength; filterX++) {
  53       SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX];
  54       accum[0] += curFilter * rowToFilter[filterX * 4 + 0];
  55       accum[1] += curFilter * rowToFilter[filterX * 4 + 1];
  56       accum[2] += curFilter * rowToFilter[filterX * 4 + 2];
  57       if (hasAlpha) {
  58         accum[3] += curFilter * rowToFilter[filterX * 4 + 3];
  59       }
  60     }
  61
  62     // Bring this value back in range. All of the filter scaling factors
  63     // are in fixed point with kShiftBits bits of fractional part.
  64     accum[0] >>= SkConvolutionFilter1D::kShiftBits;
  65     accum[1] >>= SkConvolutionFilter1D::kShiftBits;
  66     accum[2] >>= SkConvolutionFilter1D::kShiftBits;
  67
  68     if (hasAlpha) {
  69       accum[3] >>= SkConvolutionFilter1D::kShiftBits;
  70     }
  71
  72     // Store the new pixel.
  73     outRow[outX * 4 + 0] = ClampTo8(accum[0]);
  74     outRow[outX * 4 + 1] = ClampTo8(accum[1]);
  75     outRow[outX * 4 + 2] = ClampTo8(accum[2]);
  76     if (hasAlpha) {
  77       outRow[outX * 4 + 3] = ClampTo8(accum[3]);
  78     }
  79   }
  80 }
  81
  82 // Does vertical convolution to produce one output row. The filter values and
  83 // length are given in the first two parameters. These are applied to each
  84 // of the rows pointed to in the |sourceDataRows| array, with each row
  85 // being |pixelWidth| wide.
  86 //
  87 // The output must have room for |pixelWidth * 4| bytes.
  88 template <bool hasAlpha>
  89 void ConvolveVertically(
  90     const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
  91     int filterLength, unsigned char* const* sourceDataRows, int pixelWidth,
  92     unsigned char* outRow) {
  93   // We go through each column in the output and do a vertical convolution,
  94   // generating one output pixel each time.
  95   for (int outX = 0; outX < pixelWidth; outX++) {
  96     // Compute the number of bytes over in each row that the current column
  97     // we're convolving starts at. The pixel will cover the next 4 bytes.
  98     int byteOffset = outX * 4;
  99
 100     // Apply the filter to one column of pixels.
 101     int accum[4] = {0};
 102     for (int filterY = 0; filterY < filterLength; filterY++) {
 103       SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY];
 104       accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0];
 105       accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1];
 106       accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2];
 107       if (hasAlpha) {
 108         accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3];
 109       }
 110     }
 111
 112     // Bring this value back in range. All of the filter scaling factors
 113     // are in fixed point with kShiftBits bits of precision.
 114     accum[0] >>= SkConvolutionFilter1D::kShiftBits;
 115     accum[1] >>= SkConvolutionFilter1D::kShiftBits;
 116     accum[2] >>= SkConvolutionFilter1D::kShiftBits;
 117     if (hasAlpha) {
 118       accum[3] >>= SkConvolutionFilter1D::kShiftBits;
 119     }
 120
 121     // Store the new pixel.
 122     outRow[byteOffset + 0] = ClampTo8(accum[0]);
 123     outRow[byteOffset + 1] = ClampTo8(accum[1]);
 124     outRow[byteOffset + 2] = ClampTo8(accum[2]);
 125
 126     if (hasAlpha) {
 127       unsigned char alpha = ClampTo8(accum[3]);
 128
 129       // Make sure the alpha channel doesn't come out smaller than any of the
 130       // color channels. We use premultipled alpha channels, so this should
 131       // never happen, but rounding errors will cause this from time to time.
 132       // These "impossible" colors will cause overflows (and hence random pixel
 133       // values) when the resulting bitmap is drawn to the screen.
 134       //
 135       // We only need to do this when generating the final output row (here).
 136       int maxColorChannel =
 137           std::max(outRow[byteOffset + 0],
 138                    std::max(outRow[byteOffset + 1], outRow[byteOffset + 2]));
 139       if (alpha < maxColorChannel) {
 140         outRow[byteOffset + 3] = maxColorChannel;
 141       } else {
 142         outRow[byteOffset + 3] = alpha;
 143       }
 144     } else {
 145       // No alpha channel, the image is opaque.
 146       outRow[byteOffset + 3] = 0xff;
 147     }
 148   }
 149 }
 150
 151 #ifdef USE_SSE2
 152 void convolve_vertically_avx2(const int16_t* filter, int filterLen,
 153                               uint8_t* const* srcRows, int width, uint8_t* out,
 154                               bool hasAlpha);
 155 void convolve_horizontally_sse2(const unsigned char* srcData,
 156                                 const SkConvolutionFilter1D& filter,
 157                                 unsigned char* outRow, bool hasAlpha);
 158 void convolve_vertically_sse2(const int16_t* filter, int filterLen,
 159                               uint8_t* const* srcRows, int width, uint8_t* out,
 160                               bool hasAlpha);
 161 #elif defined(USE_NEON)
 162 void convolve_horizontally_neon(const unsigned char* srcData,
 163                                 const SkConvolutionFilter1D& filter,
 164                                 unsigned char* outRow, bool hasAlpha);
 165 void convolve_vertically_neon(const int16_t* filter, int filterLen,
 166                               uint8_t* const* srcRows, int width, uint8_t* out,
 167                               bool hasAlpha);
 168 #endif
 169
 170 void convolve_horizontally(const unsigned char* srcData,
 171                            const SkConvolutionFilter1D& filter,
 172                            unsigned char* outRow, bool hasAlpha) {
 173 #ifdef USE_SSE2
 174   if (mozilla::supports_sse2()) {
 175     convolve_horizontally_sse2(srcData, filter, outRow, hasAlpha);
 176     return;
 177   }
 178 #elif defined(USE_NEON)
 179   if (mozilla::supports_neon()) {
 180     convolve_horizontally_neon(srcData, filter, outRow, hasAlpha);
 181     return;
 182   }
 183 #endif
 184   if (hasAlpha) {
 185     ConvolveHorizontally<true>(srcData, filter, outRow);
 186   } else {
 187     ConvolveHorizontally<false>(srcData, filter, outRow);
 188   }
 189 }
 190
 191 void convolve_vertically(
 192     const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
 193     int filterLength, unsigned char* const* sourceDataRows, int pixelWidth,
 194     unsigned char* outRow, bool hasAlpha) {
 195 #ifdef USE_SSE2
 196   if (mozilla::supports_avx2()) {
 197     convolve_vertically_avx2(filterValues, filterLength, sourceDataRows,
 198                              pixelWidth, outRow, hasAlpha);
 199     return;
 200   }
 201   if (mozilla::supports_sse2()) {
 202     convolve_vertically_sse2(filterValues, filterLength, sourceDataRows,
 203                              pixelWidth, outRow, hasAlpha);
 204     return;
 205   }
 206 #elif defined(USE_NEON)
 207   if (mozilla::supports_neon()) {
 208     convolve_vertically_neon(filterValues, filterLength, sourceDataRows,
 209                              pixelWidth, outRow, hasAlpha);
 210     return;
 211   }
 212 #endif
 213   if (hasAlpha) {
 214     ConvolveVertically<true>(filterValues, filterLength, sourceDataRows,
 215                              pixelWidth, outRow);
 216   } else {
 217     ConvolveVertically<false>(filterValues, filterLength, sourceDataRows,
 218                               pixelWidth, outRow);
 219   }
 220 }
 221
 222 // Stores a list of rows in a circular buffer. The usage is you write into it
 223 // by calling AdvanceRow. It will keep track of which row in the buffer it
 224 // should use next, and the total number of rows added.
 225 class CircularRowBuffer {
 226  public:
 227   // The number of pixels in each row is given in |sourceRowPixelWidth|.
 228   // The maximum number of rows needed in the buffer is |maxYFilterSize|
 229   // (we only need to store enough rows for the biggest filter).
 230   //
 231   // We use the |firstInputRow| to compute the coordinates of all of the
 232   // following rows returned by Advance().
 233   CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize,
 234                     int firstInputRow)
 235       : fRowByteWidth(destRowPixelWidth * 4),
 236         fNumRows(maxYFilterSize),
 237         fNextRow(0),
 238         fNextRowCoordinate(firstInputRow) {
 239     fBuffer.resize(fRowByteWidth * maxYFilterSize);
 240     fRowAddresses.resize(fNumRows);
 241   }
 242
 243   // Moves to the next row in the buffer, returning a pointer to the beginning
 244   // of it.
 245   unsigned char* advanceRow() {
 246     unsigned char* row = &fBuffer[fNextRow * fRowByteWidth];
 247     fNextRowCoordinate++;
 248
 249     // Set the pointer to the next row to use, wrapping around if necessary.
 250     fNextRow++;
 251     if (fNextRow == fNumRows) {
 252       fNextRow = 0;
 253     }
 254     return row;
 255   }
 256
 257   // Returns a pointer to an "unrolled" array of rows. These rows will start
 258   // at the y coordinate placed into |*firstRowIndex| and will continue in
 259   // order for the maximum number of rows in this circular buffer.
 260   //
 261   // The |firstRowIndex_| may be negative. This means the circular buffer
 262   // starts before the top of the image (it hasn't been filled yet).
 263   unsigned char* const* GetRowAddresses(int* firstRowIndex) {
 264     // Example for a 4-element circular buffer holding coords 6-9.
 265     //   Row 0   Coord 8
 266     //   Row 1   Coord 9
 267     //   Row 2   Coord 6  <- fNextRow = 2, fNextRowCoordinate = 10.
 268     //   Row 3   Coord 7
 269     //
 270     // The "next" row is also the first (lowest) coordinate. This computation
 271     // may yield a negative value, but that's OK, the math will work out
 272     // since the user of this buffer will compute the offset relative
 273     // to the firstRowIndex and the negative rows will never be used.
 274     *firstRowIndex = fNextRowCoordinate - fNumRows;
 275
 276     int curRow = fNextRow;
 277     for (int i = 0; i < fNumRows; i++) {
 278       fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth];
 279
 280       // Advance to the next row, wrapping if necessary.
 281       curRow++;
 282       if (curRow == fNumRows) {
 283         curRow = 0;
 284       }
 285     }
 286     return &fRowAddresses[0];
 287   }
 288
 289  private:
 290   // The buffer storing the rows. They are packed, each one fRowByteWidth.
 291   std::vector<unsigned char> fBuffer;
 292
 293   // Number of bytes per row in the |buffer|.
 294   int fRowByteWidth;
 295
 296   // The number of rows available in the buffer.
 297   int fNumRows;
 298
 299   // The next row index we should write into. This wraps around as the
 300   // circular buffer is used.
 301   int fNextRow;
 302
 303   // The y coordinate of the |fNextRow|. This is incremented each time a
 304   // new row is appended and does not wrap.
 305   int fNextRowCoordinate;
 306
 307   // Buffer used by GetRowAddresses().
 308   std::vector<unsigned char*> fRowAddresses;
 309 };
 310
 311 SkConvolutionFilter1D::SkConvolutionFilter1D() : fMaxFilter(0) {}
 312
 313 SkConvolutionFilter1D::~SkConvolutionFilter1D() = default;
 314
 315 void SkConvolutionFilter1D::AddFilter(int filterOffset,
 316                                       const ConvolutionFixed* filterValues,
 317                                       int filterLength) {
 318   // It is common for leading/trailing filter values to be zeros. In such
 319   // cases it is beneficial to only store the central factors.
 320   // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on
 321   // a 1080p image this optimization gives a ~10% speed improvement.
 322   int filterSize = filterLength;
 323   int firstNonZero = 0;
 324   while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) {
 325     firstNonZero++;
 326   }
 327
 328   if (firstNonZero < filterLength) {
 329     // Here we have at least one non-zero factor.
 330     int lastNonZero = filterLength - 1;
 331     while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) {
 332       lastNonZero--;
 333     }
 334
 335     filterOffset += firstNonZero;
 336     filterLength = lastNonZero + 1 - firstNonZero;
 337     MOZ_ASSERT(filterLength > 0);
 338
 339     fFilterValues.insert(fFilterValues.end(), &filterValues[firstNonZero],
 340                          &filterValues[lastNonZero + 1]);
 341   } else {
 342     // Here all the factors were zeroes.
 343     filterLength = 0;
 344   }
 345
 346   FilterInstance instance = {
 347       // We pushed filterLength elements onto fFilterValues
 348       int(fFilterValues.size()) - filterLength, filterOffset, filterLength,
 349       filterSize};
 350   fFilters.push_back(instance);
 351
 352   fMaxFilter = std::max(fMaxFilter, filterLength);
 353 }
 354
 355 bool SkConvolutionFilter1D::ComputeFilterValues(
 356     const SkBitmapFilter& aBitmapFilter, int32_t aSrcSize, int32_t aDstSize) {
 357   // When we're doing a magnification, the scale will be larger than one. This
 358   // means the destination pixels are much smaller than the source pixels, and
 359   // that the range covered by the filter won't necessarily cover any source
 360   // pixel boundaries. Therefore, we use these clamped values (max of 1) for
 361   // some computations.
 362   float scale = float(aDstSize) / float(aSrcSize);
 363   float clampedScale = std::min(1.0f, scale);
 364   // This is how many source pixels from the center we need to count
 365   // to support the filtering function.
 366   float srcSupport = aBitmapFilter.width() / clampedScale;
 367   float invScale = 1.0f / scale;
 368
 369   mozilla::Vector<float, 64> filterValues;
 370   mozilla::Vector<ConvolutionFixed, 64> fixedFilterValues;
 371
 372   // Loop over all pixels in the output range. We will generate one set of
 373   // filter values for each one. Those values will tell us how to blend the
 374   // source pixels to compute the destination pixel.
 375
 376   // This value is computed based on how SkTDArray::resizeStorageToAtLeast works
 377   // in order to ensure that it does not overflow or assert. That functions
 378   // computes
 379   //   n+4 + (n+4)/4
 380   // and we want to to fit in a 32 bit signed int. Equating that to 2^31-1 and
 381   // solving n gives n = (2^31-6)*4/5 = 1717986913.6
 382   const int32_t maxToPassToReserveAdditional = 1717986913;
 383
 384   int32_t filterValueCount = int32_t(ceilf(aDstSize * srcSupport * 2));
 385   if (aDstSize > maxToPassToReserveAdditional || filterValueCount < 0 ||
 386       filterValueCount > maxToPassToReserveAdditional) {
 387     return false;
 388   }
 389   reserveAdditional(aDstSize, filterValueCount);
 390   for (int32_t destI = 0; destI < aDstSize; destI++) {
 391     // This is the pixel in the source directly under the pixel in the dest.
 392     // Note that we base computations on the "center" of the pixels. To see
 393     // why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
 394     // downscale should "cover" the pixels around the pixel with *its center*
 395     // at coordinates (2.5, 2.5) in the source, not those around (0, 0).
 396     // Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
 397     float srcPixel = (static_cast<float>(destI) + 0.5f) * invScale;
 398
 399     // Compute the (inclusive) range of source pixels the filter covers.
 400     float srcBegin = std::max(0.0f, floorf(srcPixel - srcSupport));
 401     float srcEnd = std::min(aSrcSize - 1.0f, ceilf(srcPixel + srcSupport));
 402
 403     // Compute the unnormalized filter value at each location of the source
 404     // it covers.
 405
 406     // Sum of the filter values for normalizing.
 407     // Distance from the center of the filter, this is the filter coordinate
 408     // in source space. We also need to consider the center of the pixel
 409     // when comparing distance against 'srcPixel'. In the 5x downscale
 410     // example used above the distance from the center of the filter to
 411     // the pixel with coordinates (2, 2) should be 0, because its center
 412     // is at (2.5, 2.5).
 413     int32_t filterCount = int32_t(srcEnd - srcBegin) + 1;
 414     if (filterCount <= 0 || !filterValues.resize(filterCount) ||
 415         !fixedFilterValues.resize(filterCount)) {
 416       return false;
 417     }
 418
 419     float destFilterDist = (srcBegin + 0.5f - srcPixel) * clampedScale;
 420     float filterSum = 0.0f;
 421     for (int32_t index = 0; index < filterCount; index++) {
 422       float filterValue = aBitmapFilter.evaluate(destFilterDist);
 423       filterValues[index] = filterValue;
 424       filterSum += filterValue;
 425       destFilterDist += clampedScale;
 426     }
 427
 428     // The filter must be normalized so that we don't affect the brightness of
 429     // the image. Convert to normalized fixed point.
 430     ConvolutionFixed fixedSum = 0;
 431     float invFilterSum = 1.0f / filterSum;
 432     for (int32_t fixedI = 0; fixedI < filterCount; fixedI++) {
 433       ConvolutionFixed curFixed = ToFixed(filterValues[fixedI] * invFilterSum);
 434       fixedSum += curFixed;
 435       fixedFilterValues[fixedI] = curFixed;
 436     }
 437
 438     // The conversion to fixed point will leave some rounding errors, which
 439     // we add back in to avoid affecting the brightness of the image. We
 440     // arbitrarily add this to the center of the filter array (this won't always
 441     // be the center of the filter function since it could get clipped on the
 442     // edges, but it doesn't matter enough to worry about that case).
 443     ConvolutionFixed leftovers = ToFixed(1) - fixedSum;
 444     fixedFilterValues[filterCount / 2] += leftovers;
 445
 446     AddFilter(int32_t(srcBegin), fixedFilterValues.begin(), filterCount);
 447   }
 448
 449   return maxFilter() > 0 && numValues() == aDstSize;
 450 }
 451
 452 // Does a two-dimensional convolution on the given source image.
 453 //
 454 // It is assumed the source pixel offsets referenced in the input filters
 455 // reference only valid pixels, so the source image size is not required. Each
 456 // row of the source image starts |sourceByteRowStride| after the previous
 457 // one (this allows you to have rows with some padding at the end).
 458 //
 459 // The result will be put into the given output buffer. The destination image
 460 // size will be xfilter.numValues() * yfilter.numValues() pixels. It will be
 461 // in rows of exactly xfilter.numValues() * 4 bytes.
 462 //
 463 // |sourceHasAlpha| is a hint that allows us to avoid doing computations on
 464 // the alpha channel if the image is opaque. If you don't know, set this to
 465 // true and it will work properly, but setting this to false will be a few
 466 // percent faster if you know the image is opaque.
 467 //
 468 // The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order
 469 // (this is ARGB when loaded into 32-bit words on a little-endian machine).
 470 /**
 471  *  Returns false if it was unable to perform the convolution/rescale. in which
 472  * case the output buffer is assumed to be undefined.
 473  */
 474 bool BGRAConvolve2D(const unsigned char* sourceData, int sourceByteRowStride,
 475                     bool sourceHasAlpha, const SkConvolutionFilter1D& filterX,
 476                     const SkConvolutionFilter1D& filterY,
 477                     int outputByteRowStride, unsigned char* output) {
 478   int maxYFilterSize = filterY.maxFilter();
 479
 480   // The next row in the input that we will generate a horizontally
 481   // convolved row for. If the filter doesn't start at the beginning of the
 482   // image (this is the case when we are only resizing a subset), then we
 483   // don't want to generate any output rows before that. Compute the starting
 484   // row for convolution as the first pixel for the first vertical filter.
 485   int filterOffset = 0, filterLength = 0;
 486   const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
 487       filterY.FilterForValue(0, &filterOffset, &filterLength);
 488   int nextXRow = filterOffset;
 489
 490   // We loop over each row in the input doing a horizontal convolution. This
 491   // will result in a horizontally convolved image. We write the results into
 492   // a circular buffer of convolved rows and do vertical convolution as rows
 493   // are available. This prevents us from having to store the entire
 494   // intermediate image and helps cache coherency.
 495   // We will need four extra rows to allow horizontal convolution could be done
 496   // simultaneously. We also pad each row in row buffer to be aligned-up to
 497   // 32 bytes.
 498   // TODO(jiesun): We do not use aligned load from row buffer in vertical
 499   // convolution pass yet. Somehow Windows does not like it.
 500   int rowBufferWidth = (filterX.numValues() + 31) & ~0x1F;
 501   int rowBufferHeight = maxYFilterSize;
 502
 503   // check for too-big allocation requests : crbug.com/528628
 504   {
 505     int64_t size = int64_t(rowBufferWidth) * int64_t(rowBufferHeight);
 506     // need some limit, to avoid over-committing success from malloc, but then
 507     // crashing when we try to actually use the memory.
 508     // 100meg seems big enough to allow "normal" zoom factors and image sizes
 509     // through while avoiding the crash seen by the bug (crbug.com/528628)
 510     if (size > 100 * 1024 * 1024) {
 511       //            printf_stderr("BGRAConvolve2D: tmp allocation [%lld] too
 512       //            big\n", size);
 513       return false;
 514     }
 515   }
 516
 517   CircularRowBuffer rowBuffer(rowBufferWidth, rowBufferHeight, filterOffset);
 518
 519   // Loop over every possible output row, processing just enough horizontal
 520   // convolutions to run each subsequent vertical convolution.
 521   MOZ_ASSERT(outputByteRowStride >= filterX.numValues() * 4);
 522   int numOutputRows = filterY.numValues();
 523
 524   // We need to check which is the last line to convolve before we advance 4
 525   // lines in one iteration.
 526   int lastFilterOffset, lastFilterLength;
 527   filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,
 528                          &lastFilterLength);
 529
 530   for (int outY = 0; outY < numOutputRows; outY++) {
 531     filterValues = filterY.FilterForValue(outY, &filterOffset, &filterLength);
 532
 533     // Generate output rows until we have enough to run the current filter.
 534     while (nextXRow < filterOffset + filterLength) {
 535       convolve_horizontally(
 536           &sourceData[(uint64_t)nextXRow * sourceByteRowStride], filterX,
 537           rowBuffer.advanceRow(), sourceHasAlpha);
 538       nextXRow++;
 539     }
 540
 541     // Compute where in the output image this row of final data will go.
 542     unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStride];
 543
 544     // Get the list of rows that the circular buffer has, in order.
 545     int firstRowInCircularBuffer;
 546     unsigned char* const* rowsToConvolve =
 547         rowBuffer.GetRowAddresses(&firstRowInCircularBuffer);
 548
 549     // Now compute the start of the subset of those rows that the filter needs.
 550     unsigned char* const* firstRowForFilter =
 551         &rowsToConvolve[filterOffset - firstRowInCircularBuffer];
 552
 553     convolve_vertically(filterValues, filterLength, firstRowForFilter,
 554                         filterX.numValues(), curOutputRow, sourceHasAlpha);
 555   }
 556   return true;
 557 }
 558
 559 }  // namespace skia