1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 // Copyright (c) 2011-2016 Google Inc.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the gfx/skia/LICENSE file.
7 #include "SkConvolver.h"
8 #include "mozilla/Vector.h"
11 # include "mozilla/SSE.h"
15 # include "mozilla/arm.h"
20 // Converts the argument to an 8-bit unsigned value by clamping to the range
22 static inline unsigned char ClampTo8(int a
) {
23 if (static_cast<unsigned>(a
) < 256) {
24 return a
; // Avoid the extra check in the common case.
32 // Convolves horizontally along a single row. The row data is given in
33 // |srcData| and continues for the numValues() of the filter.
34 template <bool hasAlpha
>
35 void ConvolveHorizontally(const unsigned char* srcData
,
36 const SkConvolutionFilter1D
& filter
,
37 unsigned char* outRow
) {
38 // Loop over each pixel on this row in the output image.
39 int numValues
= filter
.numValues();
40 for (int outX
= 0; outX
< numValues
; outX
++) {
41 // Get the filter that determines the current output pixel.
42 int filterOffset
, filterLength
;
43 const SkConvolutionFilter1D::ConvolutionFixed
* filterValues
=
44 filter
.FilterForValue(outX
, &filterOffset
, &filterLength
);
46 // Compute the first pixel in this row that the filter affects. It will
47 // touch |filterLength| pixels (4 bytes each) after this.
48 const unsigned char* rowToFilter
= &srcData
[filterOffset
* 4];
50 // Apply the filter to the row to get the destination pixel in |accum|.
52 for (int filterX
= 0; filterX
< filterLength
; filterX
++) {
53 SkConvolutionFilter1D::ConvolutionFixed curFilter
= filterValues
[filterX
];
54 accum
[0] += curFilter
* rowToFilter
[filterX
* 4 + 0];
55 accum
[1] += curFilter
* rowToFilter
[filterX
* 4 + 1];
56 accum
[2] += curFilter
* rowToFilter
[filterX
* 4 + 2];
58 accum
[3] += curFilter
* rowToFilter
[filterX
* 4 + 3];
62 // Bring this value back in range. All of the filter scaling factors
63 // are in fixed point with kShiftBits bits of fractional part.
64 accum
[0] >>= SkConvolutionFilter1D::kShiftBits
;
65 accum
[1] >>= SkConvolutionFilter1D::kShiftBits
;
66 accum
[2] >>= SkConvolutionFilter1D::kShiftBits
;
69 accum
[3] >>= SkConvolutionFilter1D::kShiftBits
;
72 // Store the new pixel.
73 outRow
[outX
* 4 + 0] = ClampTo8(accum
[0]);
74 outRow
[outX
* 4 + 1] = ClampTo8(accum
[1]);
75 outRow
[outX
* 4 + 2] = ClampTo8(accum
[2]);
77 outRow
[outX
* 4 + 3] = ClampTo8(accum
[3]);
82 // Does vertical convolution to produce one output row. The filter values and
83 // length are given in the first two parameters. These are applied to each
84 // of the rows pointed to in the |sourceDataRows| array, with each row
85 // being |pixelWidth| wide.
87 // The output must have room for |pixelWidth * 4| bytes.
88 template <bool hasAlpha
>
89 void ConvolveVertically(
90 const SkConvolutionFilter1D::ConvolutionFixed
* filterValues
,
91 int filterLength
, unsigned char* const* sourceDataRows
, int pixelWidth
,
92 unsigned char* outRow
) {
93 // We go through each column in the output and do a vertical convolution,
94 // generating one output pixel each time.
95 for (int outX
= 0; outX
< pixelWidth
; outX
++) {
96 // Compute the number of bytes over in each row that the current column
97 // we're convolving starts at. The pixel will cover the next 4 bytes.
98 int byteOffset
= outX
* 4;
100 // Apply the filter to one column of pixels.
102 for (int filterY
= 0; filterY
< filterLength
; filterY
++) {
103 SkConvolutionFilter1D::ConvolutionFixed curFilter
= filterValues
[filterY
];
104 accum
[0] += curFilter
* sourceDataRows
[filterY
][byteOffset
+ 0];
105 accum
[1] += curFilter
* sourceDataRows
[filterY
][byteOffset
+ 1];
106 accum
[2] += curFilter
* sourceDataRows
[filterY
][byteOffset
+ 2];
108 accum
[3] += curFilter
* sourceDataRows
[filterY
][byteOffset
+ 3];
112 // Bring this value back in range. All of the filter scaling factors
113 // are in fixed point with kShiftBits bits of precision.
114 accum
[0] >>= SkConvolutionFilter1D::kShiftBits
;
115 accum
[1] >>= SkConvolutionFilter1D::kShiftBits
;
116 accum
[2] >>= SkConvolutionFilter1D::kShiftBits
;
118 accum
[3] >>= SkConvolutionFilter1D::kShiftBits
;
121 // Store the new pixel.
122 outRow
[byteOffset
+ 0] = ClampTo8(accum
[0]);
123 outRow
[byteOffset
+ 1] = ClampTo8(accum
[1]);
124 outRow
[byteOffset
+ 2] = ClampTo8(accum
[2]);
127 unsigned char alpha
= ClampTo8(accum
[3]);
129 // Make sure the alpha channel doesn't come out smaller than any of the
130 // color channels. We use premultipled alpha channels, so this should
131 // never happen, but rounding errors will cause this from time to time.
132 // These "impossible" colors will cause overflows (and hence random pixel
133 // values) when the resulting bitmap is drawn to the screen.
135 // We only need to do this when generating the final output row (here).
136 int maxColorChannel
=
137 std::max(outRow
[byteOffset
+ 0],
138 std::max(outRow
[byteOffset
+ 1], outRow
[byteOffset
+ 2]));
139 if (alpha
< maxColorChannel
) {
140 outRow
[byteOffset
+ 3] = maxColorChannel
;
142 outRow
[byteOffset
+ 3] = alpha
;
145 // No alpha channel, the image is opaque.
146 outRow
[byteOffset
+ 3] = 0xff;
152 void convolve_vertically_avx2(const int16_t* filter
, int filterLen
,
153 uint8_t* const* srcRows
, int width
, uint8_t* out
,
155 void convolve_horizontally_sse2(const unsigned char* srcData
,
156 const SkConvolutionFilter1D
& filter
,
157 unsigned char* outRow
, bool hasAlpha
);
158 void convolve_vertically_sse2(const int16_t* filter
, int filterLen
,
159 uint8_t* const* srcRows
, int width
, uint8_t* out
,
161 #elif defined(USE_NEON)
162 void convolve_horizontally_neon(const unsigned char* srcData
,
163 const SkConvolutionFilter1D
& filter
,
164 unsigned char* outRow
, bool hasAlpha
);
165 void convolve_vertically_neon(const int16_t* filter
, int filterLen
,
166 uint8_t* const* srcRows
, int width
, uint8_t* out
,
170 void convolve_horizontally(const unsigned char* srcData
,
171 const SkConvolutionFilter1D
& filter
,
172 unsigned char* outRow
, bool hasAlpha
) {
174 if (mozilla::supports_sse2()) {
175 convolve_horizontally_sse2(srcData
, filter
, outRow
, hasAlpha
);
178 #elif defined(USE_NEON)
179 if (mozilla::supports_neon()) {
180 convolve_horizontally_neon(srcData
, filter
, outRow
, hasAlpha
);
185 ConvolveHorizontally
<true>(srcData
, filter
, outRow
);
187 ConvolveHorizontally
<false>(srcData
, filter
, outRow
);
191 void convolve_vertically(
192 const SkConvolutionFilter1D::ConvolutionFixed
* filterValues
,
193 int filterLength
, unsigned char* const* sourceDataRows
, int pixelWidth
,
194 unsigned char* outRow
, bool hasAlpha
) {
196 if (mozilla::supports_avx2()) {
197 convolve_vertically_avx2(filterValues
, filterLength
, sourceDataRows
,
198 pixelWidth
, outRow
, hasAlpha
);
201 if (mozilla::supports_sse2()) {
202 convolve_vertically_sse2(filterValues
, filterLength
, sourceDataRows
,
203 pixelWidth
, outRow
, hasAlpha
);
206 #elif defined(USE_NEON)
207 if (mozilla::supports_neon()) {
208 convolve_vertically_neon(filterValues
, filterLength
, sourceDataRows
,
209 pixelWidth
, outRow
, hasAlpha
);
214 ConvolveVertically
<true>(filterValues
, filterLength
, sourceDataRows
,
217 ConvolveVertically
<false>(filterValues
, filterLength
, sourceDataRows
,
222 // Stores a list of rows in a circular buffer. The usage is you write into it
223 // by calling AdvanceRow. It will keep track of which row in the buffer it
224 // should use next, and the total number of rows added.
225 class CircularRowBuffer
{
227 // The number of pixels in each row is given in |sourceRowPixelWidth|.
228 // The maximum number of rows needed in the buffer is |maxYFilterSize|
229 // (we only need to store enough rows for the biggest filter).
231 // We use the |firstInputRow| to compute the coordinates of all of the
232 // following rows returned by Advance().
233 CircularRowBuffer(int destRowPixelWidth
, int maxYFilterSize
,
235 : fRowByteWidth(destRowPixelWidth
* 4),
236 fNumRows(maxYFilterSize
),
238 fNextRowCoordinate(firstInputRow
) {
239 fBuffer
.resize(fRowByteWidth
* maxYFilterSize
);
240 fRowAddresses
.resize(fNumRows
);
243 // Moves to the next row in the buffer, returning a pointer to the beginning
245 unsigned char* advanceRow() {
246 unsigned char* row
= &fBuffer
[fNextRow
* fRowByteWidth
];
247 fNextRowCoordinate
++;
249 // Set the pointer to the next row to use, wrapping around if necessary.
251 if (fNextRow
== fNumRows
) {
257 // Returns a pointer to an "unrolled" array of rows. These rows will start
258 // at the y coordinate placed into |*firstRowIndex| and will continue in
259 // order for the maximum number of rows in this circular buffer.
261 // The |firstRowIndex_| may be negative. This means the circular buffer
262 // starts before the top of the image (it hasn't been filled yet).
263 unsigned char* const* GetRowAddresses(int* firstRowIndex
) {
264 // Example for a 4-element circular buffer holding coords 6-9.
267 // Row 2 Coord 6 <- fNextRow = 2, fNextRowCoordinate = 10.
270 // The "next" row is also the first (lowest) coordinate. This computation
271 // may yield a negative value, but that's OK, the math will work out
272 // since the user of this buffer will compute the offset relative
273 // to the firstRowIndex and the negative rows will never be used.
274 *firstRowIndex
= fNextRowCoordinate
- fNumRows
;
276 int curRow
= fNextRow
;
277 for (int i
= 0; i
< fNumRows
; i
++) {
278 fRowAddresses
[i
] = &fBuffer
[curRow
* fRowByteWidth
];
280 // Advance to the next row, wrapping if necessary.
282 if (curRow
== fNumRows
) {
286 return &fRowAddresses
[0];
290 // The buffer storing the rows. They are packed, each one fRowByteWidth.
291 std::vector
<unsigned char> fBuffer
;
293 // Number of bytes per row in the |buffer|.
296 // The number of rows available in the buffer.
299 // The next row index we should write into. This wraps around as the
300 // circular buffer is used.
303 // The y coordinate of the |fNextRow|. This is incremented each time a
304 // new row is appended and does not wrap.
305 int fNextRowCoordinate
;
307 // Buffer used by GetRowAddresses().
308 std::vector
<unsigned char*> fRowAddresses
;
311 SkConvolutionFilter1D::SkConvolutionFilter1D() : fMaxFilter(0) {}
313 SkConvolutionFilter1D::~SkConvolutionFilter1D() = default;
315 void SkConvolutionFilter1D::AddFilter(int filterOffset
,
316 const ConvolutionFixed
* filterValues
,
318 // It is common for leading/trailing filter values to be zeros. In such
319 // cases it is beneficial to only store the central factors.
320 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on
321 // a 1080p image this optimization gives a ~10% speed improvement.
322 int filterSize
= filterLength
;
323 int firstNonZero
= 0;
324 while (firstNonZero
< filterLength
&& filterValues
[firstNonZero
] == 0) {
328 if (firstNonZero
< filterLength
) {
329 // Here we have at least one non-zero factor.
330 int lastNonZero
= filterLength
- 1;
331 while (lastNonZero
>= 0 && filterValues
[lastNonZero
] == 0) {
335 filterOffset
+= firstNonZero
;
336 filterLength
= lastNonZero
+ 1 - firstNonZero
;
337 MOZ_ASSERT(filterLength
> 0);
339 fFilterValues
.insert(fFilterValues
.end(), &filterValues
[firstNonZero
],
340 &filterValues
[lastNonZero
+ 1]);
342 // Here all the factors were zeroes.
346 FilterInstance instance
= {
347 // We pushed filterLength elements onto fFilterValues
348 int(fFilterValues
.size()) - filterLength
, filterOffset
, filterLength
,
350 fFilters
.push_back(instance
);
352 fMaxFilter
= std::max(fMaxFilter
, filterLength
);
355 bool SkConvolutionFilter1D::ComputeFilterValues(
356 const SkBitmapFilter
& aBitmapFilter
, int32_t aSrcSize
, int32_t aDstSize
) {
357 // When we're doing a magnification, the scale will be larger than one. This
358 // means the destination pixels are much smaller than the source pixels, and
359 // that the range covered by the filter won't necessarily cover any source
360 // pixel boundaries. Therefore, we use these clamped values (max of 1) for
361 // some computations.
362 float scale
= float(aDstSize
) / float(aSrcSize
);
363 float clampedScale
= std::min(1.0f
, scale
);
364 // This is how many source pixels from the center we need to count
365 // to support the filtering function.
366 float srcSupport
= aBitmapFilter
.width() / clampedScale
;
367 float invScale
= 1.0f
/ scale
;
369 mozilla::Vector
<float, 64> filterValues
;
370 mozilla::Vector
<ConvolutionFixed
, 64> fixedFilterValues
;
372 // Loop over all pixels in the output range. We will generate one set of
373 // filter values for each one. Those values will tell us how to blend the
374 // source pixels to compute the destination pixel.
376 // This value is computed based on how SkTDArray::resizeStorageToAtLeast works
377 // in order to ensure that it does not overflow or assert. That functions
380 // and we want to to fit in a 32 bit signed int. Equating that to 2^31-1 and
381 // solving n gives n = (2^31-6)*4/5 = 1717986913.6
382 const int32_t maxToPassToReserveAdditional
= 1717986913;
384 int32_t filterValueCount
= int32_t(ceilf(aDstSize
* srcSupport
* 2));
385 if (aDstSize
> maxToPassToReserveAdditional
|| filterValueCount
< 0 ||
386 filterValueCount
> maxToPassToReserveAdditional
) {
389 reserveAdditional(aDstSize
, filterValueCount
);
390 for (int32_t destI
= 0; destI
< aDstSize
; destI
++) {
391 // This is the pixel in the source directly under the pixel in the dest.
392 // Note that we base computations on the "center" of the pixels. To see
393 // why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
394 // downscale should "cover" the pixels around the pixel with *its center*
395 // at coordinates (2.5, 2.5) in the source, not those around (0, 0).
396 // Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
397 float srcPixel
= (static_cast<float>(destI
) + 0.5f
) * invScale
;
399 // Compute the (inclusive) range of source pixels the filter covers.
400 float srcBegin
= std::max(0.0f
, floorf(srcPixel
- srcSupport
));
401 float srcEnd
= std::min(aSrcSize
- 1.0f
, ceilf(srcPixel
+ srcSupport
));
403 // Compute the unnormalized filter value at each location of the source
406 // Sum of the filter values for normalizing.
407 // Distance from the center of the filter, this is the filter coordinate
408 // in source space. We also need to consider the center of the pixel
409 // when comparing distance against 'srcPixel'. In the 5x downscale
410 // example used above the distance from the center of the filter to
411 // the pixel with coordinates (2, 2) should be 0, because its center
413 int32_t filterCount
= int32_t(srcEnd
- srcBegin
) + 1;
414 if (filterCount
<= 0 || !filterValues
.resize(filterCount
) ||
415 !fixedFilterValues
.resize(filterCount
)) {
419 float destFilterDist
= (srcBegin
+ 0.5f
- srcPixel
) * clampedScale
;
420 float filterSum
= 0.0f
;
421 for (int32_t index
= 0; index
< filterCount
; index
++) {
422 float filterValue
= aBitmapFilter
.evaluate(destFilterDist
);
423 filterValues
[index
] = filterValue
;
424 filterSum
+= filterValue
;
425 destFilterDist
+= clampedScale
;
428 // The filter must be normalized so that we don't affect the brightness of
429 // the image. Convert to normalized fixed point.
430 ConvolutionFixed fixedSum
= 0;
431 float invFilterSum
= 1.0f
/ filterSum
;
432 for (int32_t fixedI
= 0; fixedI
< filterCount
; fixedI
++) {
433 ConvolutionFixed curFixed
= ToFixed(filterValues
[fixedI
] * invFilterSum
);
434 fixedSum
+= curFixed
;
435 fixedFilterValues
[fixedI
] = curFixed
;
438 // The conversion to fixed point will leave some rounding errors, which
439 // we add back in to avoid affecting the brightness of the image. We
440 // arbitrarily add this to the center of the filter array (this won't always
441 // be the center of the filter function since it could get clipped on the
442 // edges, but it doesn't matter enough to worry about that case).
443 ConvolutionFixed leftovers
= ToFixed(1) - fixedSum
;
444 fixedFilterValues
[filterCount
/ 2] += leftovers
;
446 AddFilter(int32_t(srcBegin
), fixedFilterValues
.begin(), filterCount
);
449 return maxFilter() > 0 && numValues() == aDstSize
;
452 // Does a two-dimensional convolution on the given source image.
454 // It is assumed the source pixel offsets referenced in the input filters
455 // reference only valid pixels, so the source image size is not required. Each
456 // row of the source image starts |sourceByteRowStride| after the previous
457 // one (this allows you to have rows with some padding at the end).
459 // The result will be put into the given output buffer. The destination image
460 // size will be xfilter.numValues() * yfilter.numValues() pixels. It will be
461 // in rows of exactly xfilter.numValues() * 4 bytes.
463 // |sourceHasAlpha| is a hint that allows us to avoid doing computations on
464 // the alpha channel if the image is opaque. If you don't know, set this to
465 // true and it will work properly, but setting this to false will be a few
466 // percent faster if you know the image is opaque.
468 // The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order
469 // (this is ARGB when loaded into 32-bit words on a little-endian machine).
471 * Returns false if it was unable to perform the convolution/rescale. in which
472 * case the output buffer is assumed to be undefined.
474 bool BGRAConvolve2D(const unsigned char* sourceData
, int sourceByteRowStride
,
475 bool sourceHasAlpha
, const SkConvolutionFilter1D
& filterX
,
476 const SkConvolutionFilter1D
& filterY
,
477 int outputByteRowStride
, unsigned char* output
) {
478 int maxYFilterSize
= filterY
.maxFilter();
480 // The next row in the input that we will generate a horizontally
481 // convolved row for. If the filter doesn't start at the beginning of the
482 // image (this is the case when we are only resizing a subset), then we
483 // don't want to generate any output rows before that. Compute the starting
484 // row for convolution as the first pixel for the first vertical filter.
485 int filterOffset
= 0, filterLength
= 0;
486 const SkConvolutionFilter1D::ConvolutionFixed
* filterValues
=
487 filterY
.FilterForValue(0, &filterOffset
, &filterLength
);
488 int nextXRow
= filterOffset
;
490 // We loop over each row in the input doing a horizontal convolution. This
491 // will result in a horizontally convolved image. We write the results into
492 // a circular buffer of convolved rows and do vertical convolution as rows
493 // are available. This prevents us from having to store the entire
494 // intermediate image and helps cache coherency.
495 // We will need four extra rows to allow horizontal convolution could be done
496 // simultaneously. We also pad each row in row buffer to be aligned-up to
498 // TODO(jiesun): We do not use aligned load from row buffer in vertical
499 // convolution pass yet. Somehow Windows does not like it.
500 int rowBufferWidth
= (filterX
.numValues() + 31) & ~0x1F;
501 int rowBufferHeight
= maxYFilterSize
;
503 // check for too-big allocation requests : crbug.com/528628
505 int64_t size
= int64_t(rowBufferWidth
) * int64_t(rowBufferHeight
);
506 // need some limit, to avoid over-committing success from malloc, but then
507 // crashing when we try to actually use the memory.
508 // 100meg seems big enough to allow "normal" zoom factors and image sizes
509 // through while avoiding the crash seen by the bug (crbug.com/528628)
510 if (size
> 100 * 1024 * 1024) {
511 // printf_stderr("BGRAConvolve2D: tmp allocation [%lld] too
517 CircularRowBuffer
rowBuffer(rowBufferWidth
, rowBufferHeight
, filterOffset
);
519 // Loop over every possible output row, processing just enough horizontal
520 // convolutions to run each subsequent vertical convolution.
521 MOZ_ASSERT(outputByteRowStride
>= filterX
.numValues() * 4);
522 int numOutputRows
= filterY
.numValues();
524 // We need to check which is the last line to convolve before we advance 4
525 // lines in one iteration.
526 int lastFilterOffset
, lastFilterLength
;
527 filterY
.FilterForValue(numOutputRows
- 1, &lastFilterOffset
,
530 for (int outY
= 0; outY
< numOutputRows
; outY
++) {
531 filterValues
= filterY
.FilterForValue(outY
, &filterOffset
, &filterLength
);
533 // Generate output rows until we have enough to run the current filter.
534 while (nextXRow
< filterOffset
+ filterLength
) {
535 convolve_horizontally(
536 &sourceData
[(uint64_t)nextXRow
* sourceByteRowStride
], filterX
,
537 rowBuffer
.advanceRow(), sourceHasAlpha
);
541 // Compute where in the output image this row of final data will go.
542 unsigned char* curOutputRow
= &output
[(uint64_t)outY
* outputByteRowStride
];
544 // Get the list of rows that the circular buffer has, in order.
545 int firstRowInCircularBuffer
;
546 unsigned char* const* rowsToConvolve
=
547 rowBuffer
.GetRowAddresses(&firstRowInCircularBuffer
);
549 // Now compute the start of the subset of those rows that the filter needs.
550 unsigned char* const* firstRowForFilter
=
551 &rowsToConvolve
[filterOffset
- firstRowInCircularBuffer
];
553 convolve_vertically(filterValues
, filterLength
, firstRowForFilter
,
554 filterX
.numValues(), curOutputRow
, sourceHasAlpha
);