1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
13 #include "mozilla/CheckedInt.h"
14 #include "NumericTools.h"
17 #include "DataSurfaceHelpers.h"
21 # include "mozilla/arm.h"
28 * Helper function to process each row of the box blur.
29 * It takes care of transposing the data on input or output depending
30 * on whether we intend a horizontal or vertical blur, and whether we're
31 * reading from the initial source or writing to the final destination.
32 * It allows starting or ending anywhere within the row to accomodate
35 template <bool aTransposeInput
, bool aTransposeOutput
>
36 static inline void BoxBlurRow(const uint8_t* aInput
, uint8_t* aOutput
,
37 int32_t aLeftLobe
, int32_t aRightLobe
,
38 int32_t aWidth
, int32_t aStride
, int32_t aStart
,
40 // If the input or output is transposed, then we will move down a row
41 // for each step, instead of moving over a column. Since these values
42 // only depend on a template parameter, they will more easily get
43 // copy-propagated in the non-transposed case, which is why they
44 // are not passed as parameters.
45 const int32_t inputStep
= aTransposeInput
? aStride
: 1;
46 const int32_t outputStep
= aTransposeOutput
? aStride
: 1;
48 // We need to sample aLeftLobe pixels to the left and aRightLobe pixels
49 // to the right of the current position, then average them. So this is
50 // the size of the total width of this filter.
51 const int32_t boxSize
= aLeftLobe
+ aRightLobe
+ 1;
53 // Instead of dividing the pixel sum by boxSize to average, we can just
54 // compute a scale that will normalize the result so that it can be quickly
55 // shifted into the desired range.
56 const uint32_t reciprocal
= (1 << 24) / boxSize
;
58 // The shift would normally truncate the result, whereas we would rather
59 // prefer to round the result to the closest increment. By adding 0.5 units
60 // to the initial sum, we bias the sum so that it will be rounded by the
61 // truncation instead.
62 uint32_t alphaSum
= (boxSize
+ 1) / 2;
64 // We process the row with a moving filter, keeping a sum (alphaSum) of
65 // boxSize pixels. As we move over a pixel, we need to add on a pixel
66 // from the right extreme of the window that moved into range, and subtract
67 // off a pixel from the left extreme of window that moved out of range.
68 // But first, we need to initialization alphaSum to the contents of
69 // the window before we can get going. If the window moves out of bounds
70 // of the row, we clamp each sample to be the closest pixel from within
71 // row bounds, so the 0th and aWidth-1th pixel.
72 int32_t initLeft
= aStart
- aLeftLobe
;
74 // If the left lobe samples before the row, add in clamped samples.
75 alphaSum
+= -initLeft
* aInput
[0];
78 int32_t initRight
= aStart
+ boxSize
- aLeftLobe
;
79 if (initRight
> aWidth
) {
80 // If the right lobe samples after the row, add in clamped samples.
81 alphaSum
+= (initRight
- aWidth
) * aInput
[(aWidth
- 1) * inputStep
];
84 // Finally, add in all the valid, non-clamped samples to fill up the
85 // rest of the window.
86 const uint8_t* src
= &aInput
[initLeft
* inputStep
];
87 const uint8_t* iterEnd
= &aInput
[initRight
* inputStep
];
93 // We unroll the per-pixel loop here substantially. The amount of work
94 // done per sample is so small that the cost of a loop condition check
95 // and a branch can substantially add to or even dominate the performance
97 while (src
+ 16 * inputStep
<= iterEnd
) {
115 while (src
< iterEnd
) {
119 // Now we start moving the window over the row. We will be accessing
120 // pixels form aStart - aLeftLobe up to aEnd + aRightLobe, which may be
121 // out of bounds of the row. To avoid having to check within the inner
122 // loops if we are in bound, we instead compute the points at which
123 // we will move out of bounds of the row on the left side (splitLeft)
124 // and right side (splitRight).
125 int32_t splitLeft
= std::min(std::max(aLeftLobe
, aStart
), aEnd
);
127 std::min(std::max(aWidth
- (boxSize
- aLeftLobe
), aStart
), aEnd
);
128 // If the filter window is actually large than the size of the row,
129 // there will be a middle area of overlap where the leftmost and rightmost
130 // pixel of the filter will both be outside the row. In this case, we need
131 // to invert the splits so that splitLeft <= splitRight.
132 if (boxSize
> aWidth
) {
133 std::swap(splitLeft
, splitRight
);
136 // Process all pixels up to splitLeft that would sample before the start of
137 // the row. Note that because inputStep and outputStep may not be a const 1
138 // value, it is more performant to increment pointers here for the source and
139 // destination rather than use a loop counter, since doing so would entail an
140 // expensive multiplication that significantly slows down the loop.
141 uint8_t* dst
= &aOutput
[aStart
* outputStep
];
142 iterEnd
= &aOutput
[splitLeft
* outputStep
];
143 src
= &aInput
[(aStart
+ boxSize
- aLeftLobe
) * inputStep
];
144 uint8_t firstVal
= aInput
[0];
147 *dst = (alphaSum * reciprocal) >> 24; \
148 alphaSum += *src - firstVal; \
152 while (dst
+ 16 * outputStep
<= iterEnd
) {
170 while (dst
< iterEnd
) {
174 // Process all pixels between splitLeft and splitRight.
175 iterEnd
= &aOutput
[splitRight
* outputStep
];
176 if (boxSize
<= aWidth
) {
177 // The filter window is smaller than the row size, so the leftmost and
178 // rightmost samples are both within row bounds.
179 src
= &aInput
[(splitLeft
- aLeftLobe
) * inputStep
];
180 int32_t boxStep
= boxSize
* inputStep
;
182 #define CENTER_ITER \
183 *dst = (alphaSum * reciprocal) >> 24; \
184 alphaSum += src[boxStep] - *src; \
188 while (dst
+ 16 * outputStep
<= iterEnd
) {
206 while (dst
< iterEnd
) {
210 // The filter window is larger than the row size, and we're in the area of
211 // split overlap. So the leftmost and rightmost samples are both out of
212 // bounds and need to be clamped. We can just precompute the difference here
214 int32_t firstLastDiff
= aInput
[(aWidth
- 1) * inputStep
] - aInput
[0];
215 while (dst
< iterEnd
) {
216 *dst
= (alphaSum
* reciprocal
) >> 24;
217 alphaSum
+= firstLastDiff
;
222 // Process all remaining pixels after splitRight that would sample after the
224 iterEnd
= &aOutput
[aEnd
* outputStep
];
225 src
= &aInput
[(splitRight
- aLeftLobe
) * inputStep
];
226 uint8_t lastVal
= aInput
[(aWidth
- 1) * inputStep
];
229 *dst = (alphaSum * reciprocal) >> 24; \
230 alphaSum += lastVal - *src; \
234 while (dst
+ 16 * outputStep
<= iterEnd
) {
252 while (dst
< iterEnd
) {
258 * Box blur involves looking at one pixel, and setting its value to the average
259 * of its neighbouring pixels. This is meant to provide a 3-pass approximation
260 * of a Gaussian blur.
261 * @param aTranspose Whether to transpose the buffer when reading and writing
263 * @param aData The buffer to be blurred.
264 * @param aLobes The number of pixels to blend on the left and right for each of
266 * @param aWidth The number of columns in the buffers.
267 * @param aRows The number of rows in the buffers.
268 * @param aStride The stride of the buffer.
270 template <bool aTranspose
>
271 static void BoxBlur(uint8_t* aData
, const int32_t aLobes
[3][2], int32_t aWidth
,
272 int32_t aRows
, int32_t aStride
, IntRect aSkipRect
) {
274 std::swap(aWidth
, aRows
);
278 MOZ_ASSERT(aWidth
> 0);
280 // All three passes of the box blur that approximate the Gaussian are done
281 // on each row in turn, so we only need two temporary row buffers to process
282 // each row, instead of a full-sized buffer. Data moves from the source to the
283 // first temporary, from the first temporary to the second, then from the
284 // second back to the destination. This way is more cache-friendly than
285 // processing whe whole buffer in each pass and thus yields a nice speedup.
286 uint8_t* tmpRow
= new (std::nothrow
) uint8_t[2 * aWidth
];
290 uint8_t* tmpRow2
= tmpRow
+ aWidth
;
292 const int32_t stride
= aTranspose
? 1 : aStride
;
293 bool skipRectCoversWholeRow
=
294 0 >= aSkipRect
.X() && aWidth
<= aSkipRect
.XMost();
296 for (int32_t y
= 0; y
< aRows
; y
++) {
297 // Check whether the skip rect intersects this row. If the skip
298 // rect covers the whole surface in this row, we can avoid
299 // this row entirely (and any others along the skip rect).
300 bool inSkipRectY
= aSkipRect
.ContainsY(y
);
301 if (inSkipRectY
&& skipRectCoversWholeRow
) {
302 aData
+= stride
* (aSkipRect
.YMost() - y
);
303 y
= aSkipRect
.YMost() - 1;
307 // Read in data from the source transposed if necessary.
308 BoxBlurRow
<aTranspose
, false>(aData
, tmpRow
, aLobes
[0][0], aLobes
[0][1],
309 aWidth
, aStride
, 0, aWidth
);
311 // For the middle pass, the data is already pre-transposed and does not need
312 // to be post-transposed yet.
313 BoxBlurRow
<false, false>(tmpRow
, tmpRow2
, aLobes
[1][0], aLobes
[1][1],
314 aWidth
, aStride
, 0, aWidth
);
316 // Write back data to the destination transposed if necessary too.
317 // Make sure not to overwrite the skip rect by only outputting to the
318 // destination before and after the skip rect, if requested.
320 inSkipRectY
? std::min(std::max(aSkipRect
.X(), 0), aWidth
) : aWidth
;
321 int32_t skipEnd
= std::max(skipStart
, aSkipRect
.XMost());
323 BoxBlurRow
<false, aTranspose
>(tmpRow2
, aData
, aLobes
[2][0], aLobes
[2][1],
324 aWidth
, aStride
, 0, skipStart
);
326 if (skipEnd
< aWidth
) {
327 BoxBlurRow
<false, aTranspose
>(tmpRow2
, aData
, aLobes
[2][0], aLobes
[2][1],
328 aWidth
, aStride
, skipEnd
, aWidth
);
337 static void ComputeLobes(int32_t aRadius
, int32_t aLobes
[3][2]) {
338 int32_t major
, minor
, final
;
340 /* See http://www.w3.org/TR/SVG/filters.html#feGaussianBlur for
341 * some notes about approximating the Gaussian blur with box-blurs.
342 * The comments below are in the terminology of that page.
344 int32_t z
= aRadius
/ 3;
345 switch (aRadius
% 3) {
347 // aRadius = z*3; choose d = 2*z + 1
348 major
= minor
= final
= z
;
352 // This is a tricky case since there is no value of d which will
353 // yield a radius of exactly aRadius. If d is odd, i.e. d=2*k + 1
354 // for some integer k, then the radius will be 3*k. If d is even,
355 // i.e. d=2*k, then the radius will be 3*k - 1.
356 // So we have to choose values that don't match the standard
362 // aRadius = z*3 + 2; choose d = 2*z + 2
363 major
= final
= z
+ 1;
367 // Mathematical impossibility!
369 major
= minor
= final
= 0;
371 MOZ_ASSERT(major
+ minor
+ final
== aRadius
);
373 aLobes
[0][0] = major
;
374 aLobes
[0][1] = minor
;
375 aLobes
[1][0] = minor
;
376 aLobes
[1][1] = major
;
377 aLobes
[2][0] = final
;
378 aLobes
[2][1] = final
;
381 static void SpreadHorizontal(uint8_t* aInput
, uint8_t* aOutput
, int32_t aRadius
,
382 int32_t aWidth
, int32_t aRows
, int32_t aStride
,
383 const IntRect
& aSkipRect
) {
385 memcpy(aOutput
, aInput
, aStride
* aRows
);
389 bool skipRectCoversWholeRow
=
390 0 >= aSkipRect
.X() && aWidth
<= aSkipRect
.XMost();
391 for (int32_t y
= 0; y
< aRows
; y
++) {
392 // Check whether the skip rect intersects this row. If the skip
393 // rect covers the whole surface in this row, we can avoid
394 // this row entirely (and any others along the skip rect).
395 bool inSkipRectY
= aSkipRect
.ContainsY(y
);
396 if (inSkipRectY
&& skipRectCoversWholeRow
) {
397 y
= aSkipRect
.YMost() - 1;
401 for (int32_t x
= 0; x
< aWidth
; x
++) {
402 // Check whether we are within the skip rect. If so, go
403 // to the next point outside the skip rect.
404 if (inSkipRectY
&& aSkipRect
.ContainsX(x
)) {
405 x
= aSkipRect
.XMost();
406 if (x
>= aWidth
) break;
409 int32_t sMin
= std::max(x
- aRadius
, 0);
410 int32_t sMax
= std::min(x
+ aRadius
, aWidth
- 1);
412 for (int32_t s
= sMin
; s
<= sMax
; ++s
) {
413 v
= std::max
<int32_t>(v
, aInput
[aStride
* y
+ s
]);
415 aOutput
[aStride
* y
+ x
] = v
;
420 static void SpreadVertical(uint8_t* aInput
, uint8_t* aOutput
, int32_t aRadius
,
421 int32_t aWidth
, int32_t aRows
, int32_t aStride
,
422 const IntRect
& aSkipRect
) {
424 memcpy(aOutput
, aInput
, aStride
* aRows
);
428 bool skipRectCoversWholeColumn
=
429 0 >= aSkipRect
.Y() && aRows
<= aSkipRect
.YMost();
430 for (int32_t x
= 0; x
< aWidth
; x
++) {
431 bool inSkipRectX
= aSkipRect
.ContainsX(x
);
432 if (inSkipRectX
&& skipRectCoversWholeColumn
) {
433 x
= aSkipRect
.XMost() - 1;
437 for (int32_t y
= 0; y
< aRows
; y
++) {
438 // Check whether we are within the skip rect. If so, go
439 // to the next point outside the skip rect.
440 if (inSkipRectX
&& aSkipRect
.ContainsY(y
)) {
441 y
= aSkipRect
.YMost();
442 if (y
>= aRows
) break;
445 int32_t sMin
= std::max(y
- aRadius
, 0);
446 int32_t sMax
= std::min(y
+ aRadius
, aRows
- 1);
448 for (int32_t s
= sMin
; s
<= sMax
; ++s
) {
449 v
= std::max
<int32_t>(v
, aInput
[aStride
* s
+ x
]);
451 aOutput
[aStride
* y
+ x
] = v
;
456 CheckedInt
<int32_t> AlphaBoxBlur::RoundUpToMultipleOf4(int32_t aVal
) {
457 CheckedInt
<int32_t> val(aVal
);
466 AlphaBoxBlur::AlphaBoxBlur(const Rect
& aRect
, const IntSize
& aSpreadRadius
,
467 const IntSize
& aBlurRadius
, const Rect
* aDirtyRect
,
468 const Rect
* aSkipRect
)
469 : mStride(0), mSurfaceAllocationSize(0) {
470 Init(aRect
, aSpreadRadius
, aBlurRadius
, aDirtyRect
, aSkipRect
);
473 AlphaBoxBlur::AlphaBoxBlur()
474 : mStride(0), mSurfaceAllocationSize(0), mHasDirtyRect(false) {}
476 void AlphaBoxBlur::Init(const Rect
& aRect
, const IntSize
& aSpreadRadius
,
477 const IntSize
& aBlurRadius
, const Rect
* aDirtyRect
,
478 const Rect
* aSkipRect
) {
479 mSpreadRadius
= aSpreadRadius
;
480 mBlurRadius
= aBlurRadius
;
483 rect
.Inflate(Size(aBlurRadius
+ aSpreadRadius
));
487 // If we get passed a dirty rect from layout, we can minimize the
488 // shadow size and make painting faster.
489 mHasDirtyRect
= true;
490 mDirtyRect
= *aDirtyRect
;
491 Rect requiredBlurArea
= mDirtyRect
.Intersect(rect
);
492 requiredBlurArea
.Inflate(Size(aBlurRadius
+ aSpreadRadius
));
493 rect
= requiredBlurArea
.Intersect(rect
);
495 mHasDirtyRect
= false;
498 mRect
= TruncatedToInt(rect
);
499 if (mRect
.IsEmpty()) {
504 // If we get passed a skip rect, we can lower the amount of
505 // blurring/spreading we need to do. We convert it to IntRect to avoid
506 // expensive int<->float conversions if we were to use Rect instead.
507 Rect skipRect
= *aSkipRect
;
508 skipRect
.Deflate(Size(aBlurRadius
+ aSpreadRadius
));
509 mSkipRect
= RoundedIn(skipRect
);
510 mSkipRect
= mSkipRect
.Intersect(mRect
);
511 if (mSkipRect
.IsEqualInterior(mRect
)) {
515 mSkipRect
-= mRect
.TopLeft();
516 // Ensure the skip rect is 4-pixel-aligned in the x axis, so that all our
517 // accesses later are aligned as well, see bug 1622113.
518 mSkipRect
.SetLeftEdge(RoundUpToMultiple(mSkipRect
.X(), 4));
519 mSkipRect
.SetRightEdge(RoundDownToMultiple(mSkipRect
.XMost(), 4));
520 if (mSkipRect
.IsEmpty()) {
521 mSkipRect
= IntRect();
524 mSkipRect
= IntRect();
527 CheckedInt
<int32_t> stride
= RoundUpToMultipleOf4(mRect
.Width());
528 if (stride
.isValid()) {
529 mStride
= stride
.value();
531 // We need to leave room for an additional 3 bytes for a potential overrun
532 // in our blurring code.
533 size_t size
= BufferSizeFromStrideAndHeight(mStride
, mRect
.Height(), 3);
535 mSurfaceAllocationSize
= size
;
540 AlphaBoxBlur::AlphaBoxBlur(const Rect
& aRect
, int32_t aStride
, float aSigmaX
,
542 : mRect(TruncatedToInt(aRect
)),
544 mBlurRadius(CalculateBlurRadius(Point(aSigmaX
, aSigmaY
))),
546 mSurfaceAllocationSize(0),
547 mHasDirtyRect(false) {
549 if (aRect
.ToIntRect(&intRect
)) {
551 BufferSizeFromStrideAndHeight(intRect
.Width(), intRect
.Height());
552 if (minDataSize
!= 0) {
553 mSurfaceAllocationSize
= minDataSize
;
558 AlphaBoxBlur::~AlphaBoxBlur() = default;
560 IntSize
AlphaBoxBlur::GetSize() const {
561 IntSize
size(mRect
.Width(), mRect
.Height());
565 int32_t AlphaBoxBlur::GetStride() const { return mStride
; }
567 IntRect
AlphaBoxBlur::GetRect() const { return mRect
; }
569 Rect
* AlphaBoxBlur::GetDirtyRect() {
577 size_t AlphaBoxBlur::GetSurfaceAllocationSize() const {
578 return mSurfaceAllocationSize
;
581 void AlphaBoxBlur::Blur(uint8_t* aData
) const {
586 // no need to do all this if not blurring or spreading
587 if (mBlurRadius
!= IntSize(0, 0) || mSpreadRadius
!= IntSize(0, 0)) {
588 int32_t stride
= GetStride();
590 IntSize size
= GetSize();
592 if (mSpreadRadius
.width
> 0 || mSpreadRadius
.height
> 0) {
593 // No need to use CheckedInt here - we have validated it in the
595 size_t szB
= stride
* size
.height
;
596 uint8_t* tmpData
= new (std::nothrow
) uint8_t[szB
];
602 memset(tmpData
, 0, szB
);
604 SpreadHorizontal(aData
, tmpData
, mSpreadRadius
.width
, size
.width
,
605 size
.height
, stride
, mSkipRect
);
606 SpreadVertical(tmpData
, aData
, mSpreadRadius
.height
, size
.width
,
607 size
.height
, stride
, mSkipRect
);
612 int32_t horizontalLobes
[3][2];
613 ComputeLobes(mBlurRadius
.width
, horizontalLobes
);
614 int32_t verticalLobes
[3][2];
615 ComputeLobes(mBlurRadius
.height
, verticalLobes
);
617 // We want to allow for some extra space on the left for alignment reasons.
618 int32_t maxLeftLobe
=
619 RoundUpToMultipleOf4(horizontalLobes
[0][0] + 1).value();
621 IntSize
integralImageSize(
622 size
.width
+ maxLeftLobe
+ horizontalLobes
[1][1],
623 size
.height
+ verticalLobes
[0][0] + verticalLobes
[1][1] + 1);
625 if ((integralImageSize
.width
* integralImageSize
.height
) > (1 << 24)) {
626 // Fallback to old blurring code when the surface is so large it may
627 // overflow our integral image!
628 if (mBlurRadius
.width
> 0) {
629 BoxBlur
<false>(aData
, horizontalLobes
, size
.width
, size
.height
, stride
,
632 if (mBlurRadius
.height
> 0) {
633 BoxBlur
<true>(aData
, verticalLobes
, size
.width
, size
.height
, stride
,
637 size_t integralImageStride
=
638 GetAlignedStride
<16>(integralImageSize
.width
, 4);
639 if (integralImageStride
== 0) {
643 // We need to leave room for an additional 12 bytes for a maximum overrun
644 // of 3 pixels in the blurring code.
645 size_t bufLen
= BufferSizeFromStrideAndHeight(
646 integralImageStride
, integralImageSize
.height
, 12);
650 // bufLen is a byte count, but here we want a multiple of 32-bit ints, so
652 AlignedArray
<uint32_t> integralImage((bufLen
/ 4) +
653 ((bufLen
% 4) ? 1 : 0));
655 if (!integralImage
) {
660 if (Factory::HasSSE2()) {
661 BoxBlur_SSE2(aData
, horizontalLobes
[0][0], horizontalLobes
[0][1],
662 verticalLobes
[0][0], verticalLobes
[0][1], integralImage
,
663 integralImageStride
);
664 BoxBlur_SSE2(aData
, horizontalLobes
[1][0], horizontalLobes
[1][1],
665 verticalLobes
[1][0], verticalLobes
[1][1], integralImage
,
666 integralImageStride
);
667 BoxBlur_SSE2(aData
, horizontalLobes
[2][0], horizontalLobes
[2][1],
668 verticalLobes
[2][0], verticalLobes
[2][1], integralImage
,
669 integralImageStride
);
673 if (mozilla::supports_neon()) {
674 BoxBlur_NEON(aData
, horizontalLobes
[0][0], horizontalLobes
[0][1],
675 verticalLobes
[0][0], verticalLobes
[0][1], integralImage
,
676 integralImageStride
);
677 BoxBlur_NEON(aData
, horizontalLobes
[1][0], horizontalLobes
[1][1],
678 verticalLobes
[1][0], verticalLobes
[1][1], integralImage
,
679 integralImageStride
);
680 BoxBlur_NEON(aData
, horizontalLobes
[2][0], horizontalLobes
[2][1],
681 verticalLobes
[2][0], verticalLobes
[2][1], integralImage
,
682 integralImageStride
);
686 #ifdef _MIPS_ARCH_LOONGSON3A
687 BoxBlur_LS3(aData
, horizontalLobes
[0][0], horizontalLobes
[0][1],
688 verticalLobes
[0][0], verticalLobes
[0][1], integralImage
,
689 integralImageStride
);
690 BoxBlur_LS3(aData
, horizontalLobes
[1][0], horizontalLobes
[1][1],
691 verticalLobes
[1][0], verticalLobes
[1][1], integralImage
,
692 integralImageStride
);
693 BoxBlur_LS3(aData
, horizontalLobes
[2][0], horizontalLobes
[2][1],
694 verticalLobes
[2][0], verticalLobes
[2][1], integralImage
,
695 integralImageStride
);
697 BoxBlur_C(aData
, horizontalLobes
[0][0], horizontalLobes
[0][1],
698 verticalLobes
[0][0], verticalLobes
[0][1], integralImage
,
699 integralImageStride
);
700 BoxBlur_C(aData
, horizontalLobes
[1][0], horizontalLobes
[1][1],
701 verticalLobes
[1][0], verticalLobes
[1][1], integralImage
,
702 integralImageStride
);
703 BoxBlur_C(aData
, horizontalLobes
[2][0], horizontalLobes
[2][1],
704 verticalLobes
[2][0], verticalLobes
[2][1], integralImage
,
705 integralImageStride
);
712 MOZ_ALWAYS_INLINE
void GenerateIntegralRow(uint32_t* aDest
,
713 const uint8_t* aSource
,
714 uint32_t* aPreviousRow
,
715 const uint32_t& aSourceWidth
,
716 const uint32_t& aLeftInflation
,
717 const uint32_t& aRightInflation
) {
718 uint32_t currentRowSum
= 0;
719 uint32_t pixel
= aSource
[0];
720 for (uint32_t x
= 0; x
< aLeftInflation
; x
++) {
721 currentRowSum
+= pixel
;
722 *aDest
++ = currentRowSum
+ *aPreviousRow
++;
724 for (uint32_t x
= aLeftInflation
; x
< (aSourceWidth
+ aLeftInflation
);
726 uint32_t alphaValues
= *(uint32_t*)(aSource
+ (x
- aLeftInflation
));
727 #if defined WORDS_BIGENDIAN || defined IS_BIG_ENDIAN || defined __BIG_ENDIAN__
728 currentRowSum
+= (alphaValues
>> 24) & 0xff;
729 *aDest
++ = *aPreviousRow
++ + currentRowSum
;
730 currentRowSum
+= (alphaValues
>> 16) & 0xff;
731 *aDest
++ = *aPreviousRow
++ + currentRowSum
;
732 currentRowSum
+= (alphaValues
>> 8) & 0xff;
733 *aDest
++ = *aPreviousRow
++ + currentRowSum
;
734 currentRowSum
+= alphaValues
& 0xff;
735 *aDest
++ = *aPreviousRow
++ + currentRowSum
;
737 currentRowSum
+= alphaValues
& 0xff;
738 *aDest
++ = *aPreviousRow
++ + currentRowSum
;
740 currentRowSum
+= alphaValues
& 0xff;
741 *aDest
++ = *aPreviousRow
++ + currentRowSum
;
743 currentRowSum
+= alphaValues
& 0xff;
744 *aDest
++ = *aPreviousRow
++ + currentRowSum
;
746 currentRowSum
+= alphaValues
& 0xff;
747 *aDest
++ = *aPreviousRow
++ + currentRowSum
;
750 pixel
= aSource
[aSourceWidth
- 1];
751 for (uint32_t x
= (aSourceWidth
+ aLeftInflation
);
752 x
< (aSourceWidth
+ aLeftInflation
+ aRightInflation
); x
++) {
753 currentRowSum
+= pixel
;
754 *aDest
++ = currentRowSum
+ *aPreviousRow
++;
758 MOZ_ALWAYS_INLINE
void GenerateIntegralImage_C(
759 int32_t aLeftInflation
, int32_t aRightInflation
, int32_t aTopInflation
,
760 int32_t aBottomInflation
, uint32_t* aIntegralImage
,
761 size_t aIntegralImageStride
, uint8_t* aSource
, int32_t aSourceStride
,
762 const IntSize
& aSize
) {
763 uint32_t stride32bit
= aIntegralImageStride
/ 4;
765 IntSize
integralImageSize(aSize
.width
+ aLeftInflation
+ aRightInflation
,
766 aSize
.height
+ aTopInflation
+ aBottomInflation
);
768 memset(aIntegralImage
, 0, aIntegralImageStride
);
770 GenerateIntegralRow(aIntegralImage
, aSource
, aIntegralImage
, aSize
.width
,
771 aLeftInflation
, aRightInflation
);
772 for (int y
= 1; y
< aTopInflation
+ 1; y
++) {
773 GenerateIntegralRow(aIntegralImage
+ (y
* stride32bit
), aSource
,
774 aIntegralImage
+ (y
- 1) * stride32bit
, aSize
.width
,
775 aLeftInflation
, aRightInflation
);
778 for (int y
= aTopInflation
+ 1; y
< (aSize
.height
+ aTopInflation
); y
++) {
779 GenerateIntegralRow(aIntegralImage
+ (y
* stride32bit
),
780 aSource
+ aSourceStride
* (y
- aTopInflation
),
781 aIntegralImage
+ (y
- 1) * stride32bit
, aSize
.width
,
782 aLeftInflation
, aRightInflation
);
785 if (aBottomInflation
) {
786 for (int y
= (aSize
.height
+ aTopInflation
); y
< integralImageSize
.height
;
788 GenerateIntegralRow(aIntegralImage
+ (y
* stride32bit
),
789 aSource
+ ((aSize
.height
- 1) * aSourceStride
),
790 aIntegralImage
+ (y
- 1) * stride32bit
, aSize
.width
,
791 aLeftInflation
, aRightInflation
);
797 * Attempt to do an in-place box blur using an integral image.
799 void AlphaBoxBlur::BoxBlur_C(uint8_t* aData
, int32_t aLeftLobe
,
800 int32_t aRightLobe
, int32_t aTopLobe
,
801 int32_t aBottomLobe
, uint32_t* aIntegralImage
,
802 size_t aIntegralImageStride
) const {
803 IntSize size
= GetSize();
805 MOZ_ASSERT(size
.width
> 0);
807 // Our 'left' or 'top' lobe will include the current pixel. i.e. when
808 // looking at an integral image the value of a pixel at 'x,y' is calculated
809 // using the value of the integral image values above/below that.
812 int32_t boxSize
= (aLeftLobe
+ aRightLobe
) * (aTopLobe
+ aBottomLobe
);
814 MOZ_ASSERT(boxSize
> 0);
820 int32_t stride32bit
= aIntegralImageStride
/ 4;
822 int32_t leftInflation
= RoundUpToMultipleOf4(aLeftLobe
).value();
824 GenerateIntegralImage_C(leftInflation
, aRightLobe
, aTopLobe
, aBottomLobe
,
825 aIntegralImage
, aIntegralImageStride
, aData
, mStride
,
828 uint32_t reciprocal
= uint32_t((uint64_t(1) << 32) / boxSize
);
830 uint32_t* innerIntegral
=
831 aIntegralImage
+ (aTopLobe
* stride32bit
) + leftInflation
;
833 // Storing these locally makes this about 30% faster! Presumably the compiler
834 // can't be sure we're not altering the member variables in this loop.
835 IntRect skipRect
= mSkipRect
;
836 uint8_t* data
= aData
;
837 int32_t stride
= mStride
;
838 for (int32_t y
= 0; y
< size
.height
; y
++) {
839 // Not using ContainsY(y) because we do not skip y == skipRect.Y()
840 // although that may not be done on purpose
841 bool inSkipRectY
= y
> skipRect
.Y() && y
< skipRect
.YMost();
843 uint32_t* topLeftBase
=
844 innerIntegral
+ ((y
- aTopLobe
) * stride32bit
- aLeftLobe
);
845 uint32_t* topRightBase
=
846 innerIntegral
+ ((y
- aTopLobe
) * stride32bit
+ aRightLobe
);
847 uint32_t* bottomRightBase
=
848 innerIntegral
+ ((y
+ aBottomLobe
) * stride32bit
+ aRightLobe
);
849 uint32_t* bottomLeftBase
=
850 innerIntegral
+ ((y
+ aBottomLobe
) * stride32bit
- aLeftLobe
);
852 for (int32_t x
= 0; x
< size
.width
; x
++) {
853 // Not using ContainsX(x) because we do not skip x == skipRect.X()
854 // although that may not be done on purpose
855 if (inSkipRectY
&& x
> skipRect
.X() && x
< skipRect
.XMost()) {
856 x
= skipRect
.XMost() - 1;
857 // Trigger early jump on coming loop iterations, this will be reset
862 int32_t topLeft
= topLeftBase
[x
];
863 int32_t topRight
= topRightBase
[x
];
864 int32_t bottomRight
= bottomRightBase
[x
];
865 int32_t bottomLeft
= bottomLeftBase
[x
];
867 uint32_t value
= bottomRight
- topRight
- bottomLeft
;
870 data
[stride
* y
+ x
] =
871 (uint64_t(reciprocal
) * value
+ (uint64_t(1) << 31)) >> 32;
877 * Compute the box blur size (which we're calling the blur radius) from
878 * the standard deviation.
880 * Much of this, the 3 * sqrt(2 * pi) / 4, is the known value for
881 * approximating a Gaussian using box blurs. This yields quite a good
882 * approximation for a Gaussian. Then we multiply this by 1.5 since our
883 * code wants the radius of the entire triple-box-blur kernel instead of
884 * the diameter of an individual box blur. For more details, see:
885 * http://www.w3.org/TR/SVG11/filters.html#feGaussianBlurElement
886 * https://bugzilla.mozilla.org/show_bug.cgi?id=590039#c19
888 static const Float GAUSSIAN_SCALE_FACTOR
=
889 Float((3 * sqrt(2 * M_PI
) / 4) * 1.5);
891 IntSize
AlphaBoxBlur::CalculateBlurRadius(const Point
& aStd
) {
893 static_cast<int32_t>(floor(aStd
.x
* GAUSSIAN_SCALE_FACTOR
+ 0.5f
)),
894 static_cast<int32_t>(floor(aStd
.y
* GAUSSIAN_SCALE_FACTOR
+ 0.5f
)));
899 Float
AlphaBoxBlur::CalculateBlurSigma(int32_t aBlurRadius
) {
900 return aBlurRadius
/ GAUSSIAN_SCALE_FACTOR
;
904 } // namespace mozilla