no bug - Bumping Firefox l10n changesets r=release a=l10n-bump DONTBUILD CLOSED TREE
[gecko.git] / gfx / 2d / SkConvolver.cpp
blobbefe8da30b9027df6ca3750802c0db1fb27f03c8
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 // Copyright (c) 2011-2016 Google Inc.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the gfx/skia/LICENSE file.
7 #include "SkConvolver.h"
8 #include "mozilla/Vector.h"
10 #ifdef USE_SSE2
11 # include "mozilla/SSE.h"
12 #endif
14 #ifdef USE_NEON
15 # include "mozilla/arm.h"
16 #endif
18 namespace skia {
20 // Converts the argument to an 8-bit unsigned value by clamping to the range
21 // 0-255.
22 static inline unsigned char ClampTo8(int a) {
23 if (static_cast<unsigned>(a) < 256) {
24 return a; // Avoid the extra check in the common case.
26 if (a < 0) {
27 return 0;
29 return 255;
32 // Convolves horizontally along a single row. The row data is given in
33 // |srcData| and continues for the numValues() of the filter.
34 template <bool hasAlpha>
35 void ConvolveHorizontally(const unsigned char* srcData,
36 const SkConvolutionFilter1D& filter,
37 unsigned char* outRow) {
38 // Loop over each pixel on this row in the output image.
39 int numValues = filter.numValues();
40 for (int outX = 0; outX < numValues; outX++) {
41 // Get the filter that determines the current output pixel.
42 int filterOffset, filterLength;
43 const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
44 filter.FilterForValue(outX, &filterOffset, &filterLength);
46 // Compute the first pixel in this row that the filter affects. It will
47 // touch |filterLength| pixels (4 bytes each) after this.
48 const unsigned char* rowToFilter = &srcData[filterOffset * 4];
50 // Apply the filter to the row to get the destination pixel in |accum|.
51 int accum[4] = {0};
52 for (int filterX = 0; filterX < filterLength; filterX++) {
53 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX];
54 accum[0] += curFilter * rowToFilter[filterX * 4 + 0];
55 accum[1] += curFilter * rowToFilter[filterX * 4 + 1];
56 accum[2] += curFilter * rowToFilter[filterX * 4 + 2];
57 if (hasAlpha) {
58 accum[3] += curFilter * rowToFilter[filterX * 4 + 3];
62 // Bring this value back in range. All of the filter scaling factors
63 // are in fixed point with kShiftBits bits of fractional part.
64 accum[0] >>= SkConvolutionFilter1D::kShiftBits;
65 accum[1] >>= SkConvolutionFilter1D::kShiftBits;
66 accum[2] >>= SkConvolutionFilter1D::kShiftBits;
68 if (hasAlpha) {
69 accum[3] >>= SkConvolutionFilter1D::kShiftBits;
72 // Store the new pixel.
73 outRow[outX * 4 + 0] = ClampTo8(accum[0]);
74 outRow[outX * 4 + 1] = ClampTo8(accum[1]);
75 outRow[outX * 4 + 2] = ClampTo8(accum[2]);
76 if (hasAlpha) {
77 outRow[outX * 4 + 3] = ClampTo8(accum[3]);
82 // Does vertical convolution to produce one output row. The filter values and
83 // length are given in the first two parameters. These are applied to each
84 // of the rows pointed to in the |sourceDataRows| array, with each row
85 // being |pixelWidth| wide.
87 // The output must have room for |pixelWidth * 4| bytes.
88 template <bool hasAlpha>
89 void ConvolveVertically(
90 const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
91 int filterLength, unsigned char* const* sourceDataRows, int pixelWidth,
92 unsigned char* outRow) {
93 // We go through each column in the output and do a vertical convolution,
94 // generating one output pixel each time.
95 for (int outX = 0; outX < pixelWidth; outX++) {
96 // Compute the number of bytes over in each row that the current column
97 // we're convolving starts at. The pixel will cover the next 4 bytes.
98 int byteOffset = outX * 4;
100 // Apply the filter to one column of pixels.
101 int accum[4] = {0};
102 for (int filterY = 0; filterY < filterLength; filterY++) {
103 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY];
104 accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0];
105 accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1];
106 accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2];
107 if (hasAlpha) {
108 accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3];
112 // Bring this value back in range. All of the filter scaling factors
113 // are in fixed point with kShiftBits bits of precision.
114 accum[0] >>= SkConvolutionFilter1D::kShiftBits;
115 accum[1] >>= SkConvolutionFilter1D::kShiftBits;
116 accum[2] >>= SkConvolutionFilter1D::kShiftBits;
117 if (hasAlpha) {
118 accum[3] >>= SkConvolutionFilter1D::kShiftBits;
121 // Store the new pixel.
122 outRow[byteOffset + 0] = ClampTo8(accum[0]);
123 outRow[byteOffset + 1] = ClampTo8(accum[1]);
124 outRow[byteOffset + 2] = ClampTo8(accum[2]);
126 if (hasAlpha) {
127 unsigned char alpha = ClampTo8(accum[3]);
129 // Make sure the alpha channel doesn't come out smaller than any of the
130 // color channels. We use premultipled alpha channels, so this should
131 // never happen, but rounding errors will cause this from time to time.
132 // These "impossible" colors will cause overflows (and hence random pixel
133 // values) when the resulting bitmap is drawn to the screen.
135 // We only need to do this when generating the final output row (here).
136 int maxColorChannel =
137 std::max(outRow[byteOffset + 0],
138 std::max(outRow[byteOffset + 1], outRow[byteOffset + 2]));
139 if (alpha < maxColorChannel) {
140 outRow[byteOffset + 3] = maxColorChannel;
141 } else {
142 outRow[byteOffset + 3] = alpha;
144 } else {
145 // No alpha channel, the image is opaque.
146 outRow[byteOffset + 3] = 0xff;
151 #ifdef USE_SSE2
152 void convolve_vertically_avx2(const int16_t* filter, int filterLen,
153 uint8_t* const* srcRows, int width, uint8_t* out,
154 bool hasAlpha);
155 void convolve_horizontally_sse2(const unsigned char* srcData,
156 const SkConvolutionFilter1D& filter,
157 unsigned char* outRow, bool hasAlpha);
158 void convolve_vertically_sse2(const int16_t* filter, int filterLen,
159 uint8_t* const* srcRows, int width, uint8_t* out,
160 bool hasAlpha);
161 #elif defined(USE_NEON)
162 void convolve_horizontally_neon(const unsigned char* srcData,
163 const SkConvolutionFilter1D& filter,
164 unsigned char* outRow, bool hasAlpha);
165 void convolve_vertically_neon(const int16_t* filter, int filterLen,
166 uint8_t* const* srcRows, int width, uint8_t* out,
167 bool hasAlpha);
168 #endif
170 void convolve_horizontally(const unsigned char* srcData,
171 const SkConvolutionFilter1D& filter,
172 unsigned char* outRow, bool hasAlpha) {
173 #ifdef USE_SSE2
174 if (mozilla::supports_sse2()) {
175 convolve_horizontally_sse2(srcData, filter, outRow, hasAlpha);
176 return;
178 #elif defined(USE_NEON)
179 if (mozilla::supports_neon()) {
180 convolve_horizontally_neon(srcData, filter, outRow, hasAlpha);
181 return;
183 #endif
184 if (hasAlpha) {
185 ConvolveHorizontally<true>(srcData, filter, outRow);
186 } else {
187 ConvolveHorizontally<false>(srcData, filter, outRow);
191 void convolve_vertically(
192 const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
193 int filterLength, unsigned char* const* sourceDataRows, int pixelWidth,
194 unsigned char* outRow, bool hasAlpha) {
195 #ifdef USE_SSE2
196 if (mozilla::supports_avx2()) {
197 convolve_vertically_avx2(filterValues, filterLength, sourceDataRows,
198 pixelWidth, outRow, hasAlpha);
199 return;
201 if (mozilla::supports_sse2()) {
202 convolve_vertically_sse2(filterValues, filterLength, sourceDataRows,
203 pixelWidth, outRow, hasAlpha);
204 return;
206 #elif defined(USE_NEON)
207 if (mozilla::supports_neon()) {
208 convolve_vertically_neon(filterValues, filterLength, sourceDataRows,
209 pixelWidth, outRow, hasAlpha);
210 return;
212 #endif
213 if (hasAlpha) {
214 ConvolveVertically<true>(filterValues, filterLength, sourceDataRows,
215 pixelWidth, outRow);
216 } else {
217 ConvolveVertically<false>(filterValues, filterLength, sourceDataRows,
218 pixelWidth, outRow);
222 // Stores a list of rows in a circular buffer. The usage is you write into it
223 // by calling AdvanceRow. It will keep track of which row in the buffer it
224 // should use next, and the total number of rows added.
225 class CircularRowBuffer {
226 public:
227 // The number of pixels in each row is given in |sourceRowPixelWidth|.
228 // The maximum number of rows needed in the buffer is |maxYFilterSize|
229 // (we only need to store enough rows for the biggest filter).
231 // We use the |firstInputRow| to compute the coordinates of all of the
232 // following rows returned by Advance().
233 CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize,
234 int firstInputRow)
235 : fRowByteWidth(destRowPixelWidth * 4),
236 fNumRows(maxYFilterSize),
237 fNextRow(0),
238 fNextRowCoordinate(firstInputRow) {
239 fBuffer.resize(fRowByteWidth * maxYFilterSize);
240 fRowAddresses.resize(fNumRows);
243 // Moves to the next row in the buffer, returning a pointer to the beginning
244 // of it.
245 unsigned char* advanceRow() {
246 unsigned char* row = &fBuffer[fNextRow * fRowByteWidth];
247 fNextRowCoordinate++;
249 // Set the pointer to the next row to use, wrapping around if necessary.
250 fNextRow++;
251 if (fNextRow == fNumRows) {
252 fNextRow = 0;
254 return row;
257 // Returns a pointer to an "unrolled" array of rows. These rows will start
258 // at the y coordinate placed into |*firstRowIndex| and will continue in
259 // order for the maximum number of rows in this circular buffer.
261 // The |firstRowIndex_| may be negative. This means the circular buffer
262 // starts before the top of the image (it hasn't been filled yet).
263 unsigned char* const* GetRowAddresses(int* firstRowIndex) {
264 // Example for a 4-element circular buffer holding coords 6-9.
265 // Row 0 Coord 8
266 // Row 1 Coord 9
267 // Row 2 Coord 6 <- fNextRow = 2, fNextRowCoordinate = 10.
268 // Row 3 Coord 7
270 // The "next" row is also the first (lowest) coordinate. This computation
271 // may yield a negative value, but that's OK, the math will work out
272 // since the user of this buffer will compute the offset relative
273 // to the firstRowIndex and the negative rows will never be used.
274 *firstRowIndex = fNextRowCoordinate - fNumRows;
276 int curRow = fNextRow;
277 for (int i = 0; i < fNumRows; i++) {
278 fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth];
280 // Advance to the next row, wrapping if necessary.
281 curRow++;
282 if (curRow == fNumRows) {
283 curRow = 0;
286 return &fRowAddresses[0];
289 private:
290 // The buffer storing the rows. They are packed, each one fRowByteWidth.
291 std::vector<unsigned char> fBuffer;
293 // Number of bytes per row in the |buffer|.
294 int fRowByteWidth;
296 // The number of rows available in the buffer.
297 int fNumRows;
299 // The next row index we should write into. This wraps around as the
300 // circular buffer is used.
301 int fNextRow;
303 // The y coordinate of the |fNextRow|. This is incremented each time a
304 // new row is appended and does not wrap.
305 int fNextRowCoordinate;
307 // Buffer used by GetRowAddresses().
308 std::vector<unsigned char*> fRowAddresses;
311 SkConvolutionFilter1D::SkConvolutionFilter1D() : fMaxFilter(0) {}
313 SkConvolutionFilter1D::~SkConvolutionFilter1D() = default;
315 void SkConvolutionFilter1D::AddFilter(int filterOffset,
316 const ConvolutionFixed* filterValues,
317 int filterLength) {
318 // It is common for leading/trailing filter values to be zeros. In such
319 // cases it is beneficial to only store the central factors.
320 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on
321 // a 1080p image this optimization gives a ~10% speed improvement.
322 int filterSize = filterLength;
323 int firstNonZero = 0;
324 while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) {
325 firstNonZero++;
328 if (firstNonZero < filterLength) {
329 // Here we have at least one non-zero factor.
330 int lastNonZero = filterLength - 1;
331 while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) {
332 lastNonZero--;
335 filterOffset += firstNonZero;
336 filterLength = lastNonZero + 1 - firstNonZero;
337 MOZ_ASSERT(filterLength > 0);
339 fFilterValues.insert(fFilterValues.end(), &filterValues[firstNonZero],
340 &filterValues[lastNonZero + 1]);
341 } else {
342 // Here all the factors were zeroes.
343 filterLength = 0;
346 FilterInstance instance = {
347 // We pushed filterLength elements onto fFilterValues
348 int(fFilterValues.size()) - filterLength, filterOffset, filterLength,
349 filterSize};
350 fFilters.push_back(instance);
352 fMaxFilter = std::max(fMaxFilter, filterLength);
355 bool SkConvolutionFilter1D::ComputeFilterValues(
356 const SkBitmapFilter& aBitmapFilter, int32_t aSrcSize, int32_t aDstSize) {
357 // When we're doing a magnification, the scale will be larger than one. This
358 // means the destination pixels are much smaller than the source pixels, and
359 // that the range covered by the filter won't necessarily cover any source
360 // pixel boundaries. Therefore, we use these clamped values (max of 1) for
361 // some computations.
362 float scale = float(aDstSize) / float(aSrcSize);
363 float clampedScale = std::min(1.0f, scale);
364 // This is how many source pixels from the center we need to count
365 // to support the filtering function.
366 float srcSupport = aBitmapFilter.width() / clampedScale;
367 float invScale = 1.0f / scale;
369 mozilla::Vector<float, 64> filterValues;
370 mozilla::Vector<ConvolutionFixed, 64> fixedFilterValues;
372 // Loop over all pixels in the output range. We will generate one set of
373 // filter values for each one. Those values will tell us how to blend the
374 // source pixels to compute the destination pixel.
376 // This value is computed based on how SkTDArray::resizeStorageToAtLeast works
377 // in order to ensure that it does not overflow or assert. That functions
378 // computes
379 // n+4 + (n+4)/4
380 // and we want to to fit in a 32 bit signed int. Equating that to 2^31-1 and
381 // solving n gives n = (2^31-6)*4/5 = 1717986913.6
382 const int32_t maxToPassToReserveAdditional = 1717986913;
384 int32_t filterValueCount = int32_t(ceilf(aDstSize * srcSupport * 2));
385 if (aDstSize > maxToPassToReserveAdditional || filterValueCount < 0 ||
386 filterValueCount > maxToPassToReserveAdditional) {
387 return false;
389 reserveAdditional(aDstSize, filterValueCount);
390 for (int32_t destI = 0; destI < aDstSize; destI++) {
391 // This is the pixel in the source directly under the pixel in the dest.
392 // Note that we base computations on the "center" of the pixels. To see
393 // why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
394 // downscale should "cover" the pixels around the pixel with *its center*
395 // at coordinates (2.5, 2.5) in the source, not those around (0, 0).
396 // Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
397 float srcPixel = (static_cast<float>(destI) + 0.5f) * invScale;
399 // Compute the (inclusive) range of source pixels the filter covers.
400 float srcBegin = std::max(0.0f, floorf(srcPixel - srcSupport));
401 float srcEnd = std::min(aSrcSize - 1.0f, ceilf(srcPixel + srcSupport));
403 // Compute the unnormalized filter value at each location of the source
404 // it covers.
406 // Sum of the filter values for normalizing.
407 // Distance from the center of the filter, this is the filter coordinate
408 // in source space. We also need to consider the center of the pixel
409 // when comparing distance against 'srcPixel'. In the 5x downscale
410 // example used above the distance from the center of the filter to
411 // the pixel with coordinates (2, 2) should be 0, because its center
412 // is at (2.5, 2.5).
413 int32_t filterCount = int32_t(srcEnd - srcBegin) + 1;
414 if (filterCount <= 0 || !filterValues.resize(filterCount) ||
415 !fixedFilterValues.resize(filterCount)) {
416 return false;
419 float destFilterDist = (srcBegin + 0.5f - srcPixel) * clampedScale;
420 float filterSum = 0.0f;
421 for (int32_t index = 0; index < filterCount; index++) {
422 float filterValue = aBitmapFilter.evaluate(destFilterDist);
423 filterValues[index] = filterValue;
424 filterSum += filterValue;
425 destFilterDist += clampedScale;
428 // The filter must be normalized so that we don't affect the brightness of
429 // the image. Convert to normalized fixed point.
430 ConvolutionFixed fixedSum = 0;
431 float invFilterSum = 1.0f / filterSum;
432 for (int32_t fixedI = 0; fixedI < filterCount; fixedI++) {
433 ConvolutionFixed curFixed = ToFixed(filterValues[fixedI] * invFilterSum);
434 fixedSum += curFixed;
435 fixedFilterValues[fixedI] = curFixed;
438 // The conversion to fixed point will leave some rounding errors, which
439 // we add back in to avoid affecting the brightness of the image. We
440 // arbitrarily add this to the center of the filter array (this won't always
441 // be the center of the filter function since it could get clipped on the
442 // edges, but it doesn't matter enough to worry about that case).
443 ConvolutionFixed leftovers = ToFixed(1) - fixedSum;
444 fixedFilterValues[filterCount / 2] += leftovers;
446 AddFilter(int32_t(srcBegin), fixedFilterValues.begin(), filterCount);
449 return maxFilter() > 0 && numValues() == aDstSize;
452 // Does a two-dimensional convolution on the given source image.
454 // It is assumed the source pixel offsets referenced in the input filters
455 // reference only valid pixels, so the source image size is not required. Each
456 // row of the source image starts |sourceByteRowStride| after the previous
457 // one (this allows you to have rows with some padding at the end).
459 // The result will be put into the given output buffer. The destination image
460 // size will be xfilter.numValues() * yfilter.numValues() pixels. It will be
461 // in rows of exactly xfilter.numValues() * 4 bytes.
463 // |sourceHasAlpha| is a hint that allows us to avoid doing computations on
464 // the alpha channel if the image is opaque. If you don't know, set this to
465 // true and it will work properly, but setting this to false will be a few
466 // percent faster if you know the image is opaque.
468 // The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order
469 // (this is ARGB when loaded into 32-bit words on a little-endian machine).
471 * Returns false if it was unable to perform the convolution/rescale. in which
472 * case the output buffer is assumed to be undefined.
474 bool BGRAConvolve2D(const unsigned char* sourceData, int sourceByteRowStride,
475 bool sourceHasAlpha, const SkConvolutionFilter1D& filterX,
476 const SkConvolutionFilter1D& filterY,
477 int outputByteRowStride, unsigned char* output) {
478 int maxYFilterSize = filterY.maxFilter();
480 // The next row in the input that we will generate a horizontally
481 // convolved row for. If the filter doesn't start at the beginning of the
482 // image (this is the case when we are only resizing a subset), then we
483 // don't want to generate any output rows before that. Compute the starting
484 // row for convolution as the first pixel for the first vertical filter.
485 int filterOffset = 0, filterLength = 0;
486 const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
487 filterY.FilterForValue(0, &filterOffset, &filterLength);
488 int nextXRow = filterOffset;
490 // We loop over each row in the input doing a horizontal convolution. This
491 // will result in a horizontally convolved image. We write the results into
492 // a circular buffer of convolved rows and do vertical convolution as rows
493 // are available. This prevents us from having to store the entire
494 // intermediate image and helps cache coherency.
495 // We will need four extra rows to allow horizontal convolution could be done
496 // simultaneously. We also pad each row in row buffer to be aligned-up to
497 // 32 bytes.
498 // TODO(jiesun): We do not use aligned load from row buffer in vertical
499 // convolution pass yet. Somehow Windows does not like it.
500 int rowBufferWidth = (filterX.numValues() + 31) & ~0x1F;
501 int rowBufferHeight = maxYFilterSize;
503 // check for too-big allocation requests : crbug.com/528628
505 int64_t size = int64_t(rowBufferWidth) * int64_t(rowBufferHeight);
506 // need some limit, to avoid over-committing success from malloc, but then
507 // crashing when we try to actually use the memory.
508 // 100meg seems big enough to allow "normal" zoom factors and image sizes
509 // through while avoiding the crash seen by the bug (crbug.com/528628)
510 if (size > 100 * 1024 * 1024) {
511 // printf_stderr("BGRAConvolve2D: tmp allocation [%lld] too
512 // big\n", size);
513 return false;
517 CircularRowBuffer rowBuffer(rowBufferWidth, rowBufferHeight, filterOffset);
519 // Loop over every possible output row, processing just enough horizontal
520 // convolutions to run each subsequent vertical convolution.
521 MOZ_ASSERT(outputByteRowStride >= filterX.numValues() * 4);
522 int numOutputRows = filterY.numValues();
524 // We need to check which is the last line to convolve before we advance 4
525 // lines in one iteration.
526 int lastFilterOffset, lastFilterLength;
527 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,
528 &lastFilterLength);
530 for (int outY = 0; outY < numOutputRows; outY++) {
531 filterValues = filterY.FilterForValue(outY, &filterOffset, &filterLength);
533 // Generate output rows until we have enough to run the current filter.
534 while (nextXRow < filterOffset + filterLength) {
535 convolve_horizontally(
536 &sourceData[(uint64_t)nextXRow * sourceByteRowStride], filterX,
537 rowBuffer.advanceRow(), sourceHasAlpha);
538 nextXRow++;
541 // Compute where in the output image this row of final data will go.
542 unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStride];
544 // Get the list of rows that the circular buffer has, in order.
545 int firstRowInCircularBuffer;
546 unsigned char* const* rowsToConvolve =
547 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer);
549 // Now compute the start of the subset of those rows that the filter needs.
550 unsigned char* const* firstRowForFilter =
551 &rowsToConvolve[filterOffset - firstRowInCircularBuffer];
553 convolve_vertically(filterValues, filterLength, firstRowForFilter,
554 filterX.numValues(), curOutputRow, sourceHasAlpha);
556 return true;
559 } // namespace skia