gfx/2d/Blur.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "Blur.h"
   8
   9 #include <algorithm>
  10 #include <math.h>
  11 #include <string.h>
  12
  13 #include "mozilla/CheckedInt.h"
  14 #include "mozilla/Constants.h"
  15
  16 #include "2D.h"
  17 #include "DataSurfaceHelpers.h"
  18 #include "Tools.h"
  19
  20 using namespace std;
  21
  22 namespace mozilla {
  23 namespace gfx {
  24
  25 /**
  26  * Box blur involves looking at one pixel, and setting its value to the average
  27  * of its neighbouring pixels.
  28  * @param aInput The input buffer.
  29  * @param aOutput The output buffer.
  30  * @param aLeftLobe The number of pixels to blend on the left.
  31  * @param aRightLobe The number of pixels to blend on the right.
  32  * @param aWidth The number of columns in the buffers.
  33  * @param aRows The number of rows in the buffers.
  34  * @param aSkipRect An area to skip blurring in.
  35  * XXX shouldn't we pass stride in separately here?
  36  */
  37 static void
  38 BoxBlurHorizontal(unsigned char* aInput,
  39                   unsigned char* aOutput,
  40                   int32_t aLeftLobe,
  41                   int32_t aRightLobe,
  42                   int32_t aWidth,
  43                   int32_t aRows,
  44                   const IntRect& aSkipRect)
  45 {
  46     MOZ_ASSERT(aWidth > 0);
  47
  48     int32_t boxSize = aLeftLobe + aRightLobe + 1;
  49     bool skipRectCoversWholeRow = 0 >= aSkipRect.x &&
  50                                   aWidth <= aSkipRect.XMost();
  51     if (boxSize == 1) {
  52         memcpy(aOutput, aInput, aWidth*aRows);
  53         return;
  54     }
  55     uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
  56
  57     for (int32_t y = 0; y < aRows; y++) {
  58         // Check whether the skip rect intersects this row. If the skip
  59         // rect covers the whole surface in this row, we can avoid
  60         // this row entirely (and any others along the skip rect).
  61         bool inSkipRectY = y >= aSkipRect.y &&
  62                            y < aSkipRect.YMost();
  63         if (inSkipRectY && skipRectCoversWholeRow) {
  64             y = aSkipRect.YMost() - 1;
  65             continue;
  66         }
  67
  68         uint32_t alphaSum = 0;
  69         for (int32_t i = 0; i < boxSize; i++) {
  70             int32_t pos = i - aLeftLobe;
  71             // See assertion above; if aWidth is zero, then we would have no
  72             // valid position to clamp to.
  73             pos = max(pos, 0);
  74             pos = min(pos, aWidth - 1);
  75             alphaSum += aInput[aWidth * y + pos];
  76         }
  77         for (int32_t x = 0; x < aWidth; x++) {
  78             // Check whether we are within the skip rect. If so, go
  79             // to the next point outside the skip rect.
  80             if (inSkipRectY && x >= aSkipRect.x &&
  81                 x < aSkipRect.XMost()) {
  82                 x = aSkipRect.XMost();
  83                 if (x >= aWidth)
  84                     break;
  85
  86                 // Recalculate the neighbouring alpha values for
  87                 // our new point on the surface.
  88                 alphaSum = 0;
  89                 for (int32_t i = 0; i < boxSize; i++) {
  90                     int32_t pos = x + i - aLeftLobe;
  91                     // See assertion above; if aWidth is zero, then we would have no
  92                     // valid position to clamp to.
  93                     pos = max(pos, 0);
  94                     pos = min(pos, aWidth - 1);
  95                     alphaSum += aInput[aWidth * y + pos];
  96                 }
  97             }
  98             int32_t tmp = x - aLeftLobe;
  99             int32_t last = max(tmp, 0);
 100             int32_t next = min(tmp + boxSize, aWidth - 1);
 101
 102             aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32;
 103
 104             alphaSum += aInput[aWidth * y + next] -
 105                         aInput[aWidth * y + last];
 106         }
 107     }
 108 }
 109
 110 /**
 111  * Identical to BoxBlurHorizontal, except it blurs top and bottom instead of
 112  * left and right.
 113  * XXX shouldn't we pass stride in separately here?
 114  */
 115 static void
 116 BoxBlurVertical(unsigned char* aInput,
 117                 unsigned char* aOutput,
 118                 int32_t aTopLobe,
 119                 int32_t aBottomLobe,
 120                 int32_t aWidth,
 121                 int32_t aRows,
 122                 const IntRect& aSkipRect)
 123 {
 124     MOZ_ASSERT(aRows > 0);
 125
 126     int32_t boxSize = aTopLobe + aBottomLobe + 1;
 127     bool skipRectCoversWholeColumn = 0 >= aSkipRect.y &&
 128                                      aRows <= aSkipRect.YMost();
 129     if (boxSize == 1) {
 130         memcpy(aOutput, aInput, aWidth*aRows);
 131         return;
 132     }
 133     uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
 134
 135     for (int32_t x = 0; x < aWidth; x++) {
 136         bool inSkipRectX = x >= aSkipRect.x &&
 137                            x < aSkipRect.XMost();
 138         if (inSkipRectX && skipRectCoversWholeColumn) {
 139             x = aSkipRect.XMost() - 1;
 140             continue;
 141         }
 142
 143         uint32_t alphaSum = 0;
 144         for (int32_t i = 0; i < boxSize; i++) {
 145             int32_t pos = i - aTopLobe;
 146             // See assertion above; if aRows is zero, then we would have no
 147             // valid position to clamp to.
 148             pos = max(pos, 0);
 149             pos = min(pos, aRows - 1);
 150             alphaSum += aInput[aWidth * pos + x];
 151         }
 152         for (int32_t y = 0; y < aRows; y++) {
 153             if (inSkipRectX && y >= aSkipRect.y &&
 154                 y < aSkipRect.YMost()) {
 155                 y = aSkipRect.YMost();
 156                 if (y >= aRows)
 157                     break;
 158
 159                 alphaSum = 0;
 160                 for (int32_t i = 0; i < boxSize; i++) {
 161                     int32_t pos = y + i - aTopLobe;
 162                     // See assertion above; if aRows is zero, then we would have no
 163                     // valid position to clamp to.
 164                     pos = max(pos, 0);
 165                     pos = min(pos, aRows - 1);
 166                     alphaSum += aInput[aWidth * pos + x];
 167                 }
 168             }
 169             int32_t tmp = y - aTopLobe;
 170             int32_t last = max(tmp, 0);
 171             int32_t next = min(tmp + boxSize, aRows - 1);
 172
 173             aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32;
 174
 175             alphaSum += aInput[aWidth * next + x] -
 176                         aInput[aWidth * last + x];
 177         }
 178     }
 179 }
 180
 181 static void ComputeLobes(int32_t aRadius, int32_t aLobes[3][2])
 182 {
 183     int32_t major, minor, final;
 184
 185     /* See http://www.w3.org/TR/SVG/filters.html#feGaussianBlur for
 186      * some notes about approximating the Gaussian blur with box-blurs.
 187      * The comments below are in the terminology of that page.
 188      */
 189     int32_t z = aRadius / 3;
 190     switch (aRadius % 3) {
 191     case 0:
 192         // aRadius = z*3; choose d = 2*z + 1
 193         major = minor = final = z;
 194         break;
 195     case 1:
 196         // aRadius = z*3 + 1
 197         // This is a tricky case since there is no value of d which will
 198         // yield a radius of exactly aRadius. If d is odd, i.e. d=2*k + 1
 199         // for some integer k, then the radius will be 3*k. If d is even,
 200         // i.e. d=2*k, then the radius will be 3*k - 1.
 201         // So we have to choose values that don't match the standard
 202         // algorithm.
 203         major = z + 1;
 204         minor = final = z;
 205         break;
 206     case 2:
 207         // aRadius = z*3 + 2; choose d = 2*z + 2
 208         major = final = z + 1;
 209         minor = z;
 210         break;
 211     default:
 212         // Mathematical impossibility!
 213         MOZ_ASSERT(false);
 214         major = minor = final = 0;
 215     }
 216     MOZ_ASSERT(major + minor + final == aRadius);
 217
 218     aLobes[0][0] = major;
 219     aLobes[0][1] = minor;
 220     aLobes[1][0] = minor;
 221     aLobes[1][1] = major;
 222     aLobes[2][0] = final;
 223     aLobes[2][1] = final;
 224 }
 225
 226 static void
 227 SpreadHorizontal(unsigned char* aInput,
 228                  unsigned char* aOutput,
 229                  int32_t aRadius,
 230                  int32_t aWidth,
 231                  int32_t aRows,
 232                  int32_t aStride,
 233                  const IntRect& aSkipRect)
 234 {
 235     if (aRadius == 0) {
 236         memcpy(aOutput, aInput, aStride * aRows);
 237         return;
 238     }
 239
 240     bool skipRectCoversWholeRow = 0 >= aSkipRect.x &&
 241                                     aWidth <= aSkipRect.XMost();
 242     for (int32_t y = 0; y < aRows; y++) {
 243         // Check whether the skip rect intersects this row. If the skip
 244         // rect covers the whole surface in this row, we can avoid
 245         // this row entirely (and any others along the skip rect).
 246         bool inSkipRectY = y >= aSkipRect.y &&
 247                              y < aSkipRect.YMost();
 248         if (inSkipRectY && skipRectCoversWholeRow) {
 249             y = aSkipRect.YMost() - 1;
 250             continue;
 251         }
 252
 253         for (int32_t x = 0; x < aWidth; x++) {
 254             // Check whether we are within the skip rect. If so, go
 255             // to the next point outside the skip rect.
 256             if (inSkipRectY && x >= aSkipRect.x &&
 257                 x < aSkipRect.XMost()) {
 258                 x = aSkipRect.XMost();
 259                 if (x >= aWidth)
 260                     break;
 261             }
 262
 263             int32_t sMin = max(x - aRadius, 0);
 264             int32_t sMax = min(x + aRadius, aWidth - 1);
 265             int32_t v = 0;
 266             for (int32_t s = sMin; s <= sMax; ++s) {
 267                 v = max<int32_t>(v, aInput[aStride * y + s]);
 268             }
 269             aOutput[aStride * y + x] = v;
 270         }
 271     }
 272 }
 273
 274 static void
 275 SpreadVertical(unsigned char* aInput,
 276                unsigned char* aOutput,
 277                int32_t aRadius,
 278                int32_t aWidth,
 279                int32_t aRows,
 280                int32_t aStride,
 281                const IntRect& aSkipRect)
 282 {
 283     if (aRadius == 0) {
 284         memcpy(aOutput, aInput, aStride * aRows);
 285         return;
 286     }
 287
 288     bool skipRectCoversWholeColumn = 0 >= aSkipRect.y &&
 289                                      aRows <= aSkipRect.YMost();
 290     for (int32_t x = 0; x < aWidth; x++) {
 291         bool inSkipRectX = x >= aSkipRect.x &&
 292                            x < aSkipRect.XMost();
 293         if (inSkipRectX && skipRectCoversWholeColumn) {
 294             x = aSkipRect.XMost() - 1;
 295             continue;
 296         }
 297
 298         for (int32_t y = 0; y < aRows; y++) {
 299             // Check whether we are within the skip rect. If so, go
 300             // to the next point outside the skip rect.
 301             if (inSkipRectX && y >= aSkipRect.y &&
 302                 y < aSkipRect.YMost()) {
 303                 y = aSkipRect.YMost();
 304                 if (y >= aRows)
 305                     break;
 306             }
 307
 308             int32_t sMin = max(y - aRadius, 0);
 309             int32_t sMax = min(y + aRadius, aRows - 1);
 310             int32_t v = 0;
 311             for (int32_t s = sMin; s <= sMax; ++s) {
 312                 v = max<int32_t>(v, aInput[aStride * s + x]);
 313             }
 314             aOutput[aStride * y + x] = v;
 315         }
 316     }
 317 }
 318
 319 CheckedInt<int32_t>
 320 AlphaBoxBlur::RoundUpToMultipleOf4(int32_t aVal)
 321 {
 322   CheckedInt<int32_t> val(aVal);
 323
 324   val += 3;
 325   val /= 4;
 326   val *= 4;
 327
 328   return val;
 329 }
 330
 331 AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect,
 332                            const IntSize& aSpreadRadius,
 333                            const IntSize& aBlurRadius,
 334                            const Rect* aDirtyRect,
 335                            const Rect* aSkipRect)
 336  : mSpreadRadius(aSpreadRadius),
 337    mBlurRadius(aBlurRadius),
 338    mSurfaceAllocationSize(0)
 339 {
 340   Rect rect(aRect);
 341   rect.Inflate(Size(aBlurRadius + aSpreadRadius));
 342   rect.RoundOut();
 343
 344   if (aDirtyRect) {
 345     // If we get passed a dirty rect from layout, we can minimize the
 346     // shadow size and make painting faster.
 347     mHasDirtyRect = true;
 348     mDirtyRect = *aDirtyRect;
 349     Rect requiredBlurArea = mDirtyRect.Intersect(rect);
 350     requiredBlurArea.Inflate(Size(aBlurRadius + aSpreadRadius));
 351     rect = requiredBlurArea.Intersect(rect);
 352   } else {
 353     mHasDirtyRect = false;
 354   }
 355
 356   mRect = IntRect(int32_t(rect.x), int32_t(rect.y),
 357                   int32_t(rect.width), int32_t(rect.height));
 358   if (mRect.IsEmpty()) {
 359     return;
 360   }
 361
 362   if (aSkipRect) {
 363     // If we get passed a skip rect, we can lower the amount of
 364     // blurring/spreading we need to do. We convert it to IntRect to avoid
 365     // expensive int<->float conversions if we were to use Rect instead.
 366     Rect skipRect = *aSkipRect;
 367     skipRect.RoundIn();
 368     skipRect.Deflate(Size(aBlurRadius + aSpreadRadius));
 369     mSkipRect = IntRect(int32_t(skipRect.x), int32_t(skipRect.y),
 370                         int32_t(skipRect.width), int32_t(skipRect.height));
 371
 372     mSkipRect = mSkipRect.Intersect(mRect);
 373     if (mSkipRect.IsEqualInterior(mRect))
 374       return;
 375
 376     mSkipRect -= mRect.TopLeft();
 377   } else {
 378     mSkipRect = IntRect(0, 0, 0, 0);
 379   }
 380
 381   CheckedInt<int32_t> stride = RoundUpToMultipleOf4(mRect.width);
 382   if (stride.isValid()) {
 383     mStride = stride.value();
 384
 385     // We need to leave room for an additional 3 bytes for a potential overrun
 386     // in our blurring code.
 387     size_t size = BufferSizeFromStrideAndHeight(mStride, mRect.height, 3);
 388     if (size != 0) {
 389       mSurfaceAllocationSize = size;
 390     }
 391   }
 392 }
 393
 394 AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect,
 395                            int32_t aStride,
 396                            float aSigmaX,
 397                            float aSigmaY)
 398   : mRect(int32_t(aRect.x), int32_t(aRect.y),
 399           int32_t(aRect.width), int32_t(aRect.height)),
 400     mSpreadRadius(),
 401     mBlurRadius(CalculateBlurRadius(Point(aSigmaX, aSigmaY))),
 402     mStride(aStride),
 403     mSurfaceAllocationSize(0)
 404 {
 405   IntRect intRect;
 406   if (aRect.ToIntRect(&intRect)) {
 407     size_t minDataSize = BufferSizeFromStrideAndHeight(intRect.width, intRect.height);
 408     if (minDataSize != 0) {
 409       mSurfaceAllocationSize = minDataSize;
 410     }
 411   }
 412 }
 413
 414
 415 AlphaBoxBlur::~AlphaBoxBlur()
 416 {
 417 }
 418
 419 IntSize
 420 AlphaBoxBlur::GetSize()
 421 {
 422   IntSize size(mRect.width, mRect.height);
 423   return size;
 424 }
 425
 426 int32_t
 427 AlphaBoxBlur::GetStride()
 428 {
 429   return mStride;
 430 }
 431
 432 IntRect
 433 AlphaBoxBlur::GetRect()
 434 {
 435   return mRect;
 436 }
 437
 438 Rect*
 439 AlphaBoxBlur::GetDirtyRect()
 440 {
 441   if (mHasDirtyRect) {
 442     return &mDirtyRect;
 443   }
 444
 445   return nullptr;
 446 }
 447
 448 size_t
 449 AlphaBoxBlur::GetSurfaceAllocationSize() const
 450 {
 451   return mSurfaceAllocationSize;
 452 }
 453
 454 void
 455 AlphaBoxBlur::Blur(uint8_t* aData)
 456 {
 457   if (!aData) {
 458     return;
 459   }
 460
 461   // no need to do all this if not blurring or spreading
 462   if (mBlurRadius != IntSize(0,0) || mSpreadRadius != IntSize(0,0)) {
 463     int32_t stride = GetStride();
 464
 465     IntSize size = GetSize();
 466
 467     if (mSpreadRadius.width > 0 || mSpreadRadius.height > 0) {
 468       // No need to use CheckedInt here - we have validated it in the constructor.
 469       size_t szB = stride * size.height;
 470       unsigned char* tmpData = new (std::nothrow) uint8_t[szB];
 471
 472       if (!tmpData) {
 473         return;
 474       }
 475
 476       memset(tmpData, 0, szB);
 477
 478       SpreadHorizontal(aData, tmpData, mSpreadRadius.width, GetSize().width, GetSize().height, stride, mSkipRect);
 479       SpreadVertical(tmpData, aData, mSpreadRadius.height, GetSize().width, GetSize().height, stride, mSkipRect);
 480
 481       delete [] tmpData;
 482     }
 483
 484     int32_t horizontalLobes[3][2];
 485     ComputeLobes(mBlurRadius.width, horizontalLobes);
 486     int32_t verticalLobes[3][2];
 487     ComputeLobes(mBlurRadius.height, verticalLobes);
 488
 489     // We want to allow for some extra space on the left for alignment reasons.
 490     int32_t maxLeftLobe = RoundUpToMultipleOf4(horizontalLobes[0][0] + 1).value();
 491
 492     IntSize integralImageSize(size.width + maxLeftLobe + horizontalLobes[1][1],
 493                               size.height + verticalLobes[0][0] + verticalLobes[1][1] + 1);
 494
 495     if ((integralImageSize.width * integralImageSize.height) > (1 << 24)) {
 496       // Fallback to old blurring code when the surface is so large it may
 497       // overflow our integral image!
 498
 499       // No need to use CheckedInt here - we have validated it in the constructor.
 500       size_t szB = stride * size.height;
 501       uint8_t* tmpData = new (std::nothrow) uint8_t[szB];
 502       if (!tmpData) {
 503         return;
 504       }
 505
 506       memset(tmpData, 0, szB);
 507
 508       uint8_t* a = aData;
 509       uint8_t* b = tmpData;
 510       if (mBlurRadius.width > 0) {
 511         BoxBlurHorizontal(a, b, horizontalLobes[0][0], horizontalLobes[0][1], stride, GetSize().height, mSkipRect);
 512         BoxBlurHorizontal(b, a, horizontalLobes[1][0], horizontalLobes[1][1], stride, GetSize().height, mSkipRect);
 513         BoxBlurHorizontal(a, b, horizontalLobes[2][0], horizontalLobes[2][1], stride, GetSize().height, mSkipRect);
 514       } else {
 515         a = tmpData;
 516         b = aData;
 517       }
 518       // The result is in 'b' here.
 519       if (mBlurRadius.height > 0) {
 520         BoxBlurVertical(b, a, verticalLobes[0][0], verticalLobes[0][1], stride, GetSize().height, mSkipRect);
 521         BoxBlurVertical(a, b, verticalLobes[1][0], verticalLobes[1][1], stride, GetSize().height, mSkipRect);
 522         BoxBlurVertical(b, a, verticalLobes[2][0], verticalLobes[2][1], stride, GetSize().height, mSkipRect);
 523       } else {
 524         a = b;
 525       }
 526       // The result is in 'a' here.
 527       if (a == tmpData) {
 528         memcpy(aData, tmpData, szB);
 529       }
 530       delete [] tmpData;
 531     } else {
 532       size_t integralImageStride = GetAlignedStride<16>(integralImageSize.width * 4);
 533
 534       // We need to leave room for an additional 12 bytes for a maximum overrun
 535       // of 3 pixels in the blurring code.
 536       size_t bufLen = BufferSizeFromStrideAndHeight(integralImageStride, integralImageSize.height, 12);
 537       if (bufLen == 0) {
 538         return;
 539       }
 540       // bufLen is a byte count, but here we want a multiple of 32-bit ints, so
 541       // we divide by 4.
 542       AlignedArray<uint32_t> integralImage((bufLen / 4) + ((bufLen % 4) ? 1 : 0));
 543
 544       if (!integralImage) {
 545         return;
 546       }
 547 #ifdef USE_SSE2
 548       if (Factory::HasSSE2()) {
 549         BoxBlur_SSE2(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
 550                      verticalLobes[0][1], integralImage, integralImageStride);
 551         BoxBlur_SSE2(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
 552                      verticalLobes[1][1], integralImage, integralImageStride);
 553         BoxBlur_SSE2(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
 554                      verticalLobes[2][1], integralImage, integralImageStride);
 555       } else
 556 #endif
 557       {
 558         BoxBlur_C(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
 559                   verticalLobes[0][1], integralImage, integralImageStride);
 560         BoxBlur_C(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
 561                   verticalLobes[1][1], integralImage, integralImageStride);
 562         BoxBlur_C(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
 563                   verticalLobes[2][1], integralImage, integralImageStride);
 564       }
 565     }
 566   }
 567 }
 568
 569 MOZ_ALWAYS_INLINE void
 570 GenerateIntegralRow(uint32_t  *aDest, const uint8_t *aSource, uint32_t *aPreviousRow,
 571                     const uint32_t &aSourceWidth, const uint32_t &aLeftInflation, const uint32_t &aRightInflation)
 572 {
 573   uint32_t currentRowSum = 0;
 574   uint32_t pixel = aSource[0];
 575   for (uint32_t x = 0; x < aLeftInflation; x++) {
 576     currentRowSum += pixel;
 577     *aDest++ = currentRowSum + *aPreviousRow++;
 578   }
 579   for (uint32_t x = aLeftInflation; x < (aSourceWidth + aLeftInflation); x += 4) {
 580       uint32_t alphaValues = *(uint32_t*)(aSource + (x - aLeftInflation));
 581 #if defined WORDS_BIGENDIAN || defined IS_BIG_ENDIAN || defined __BIG_ENDIAN__
 582       currentRowSum += (alphaValues >> 24) & 0xff;
 583       *aDest++ = *aPreviousRow++ + currentRowSum;
 584       currentRowSum += (alphaValues >> 16) & 0xff;
 585       *aDest++ = *aPreviousRow++ + currentRowSum;
 586       currentRowSum += (alphaValues >> 8) & 0xff;
 587       *aDest++ = *aPreviousRow++ + currentRowSum;
 588       currentRowSum += alphaValues & 0xff;
 589       *aDest++ = *aPreviousRow++ + currentRowSum;
 590 #else
 591       currentRowSum += alphaValues & 0xff;
 592       *aDest++ = *aPreviousRow++ + currentRowSum;
 593       alphaValues >>= 8;
 594       currentRowSum += alphaValues & 0xff;
 595       *aDest++ = *aPreviousRow++ + currentRowSum;
 596       alphaValues >>= 8;
 597       currentRowSum += alphaValues & 0xff;
 598       *aDest++ = *aPreviousRow++ + currentRowSum;
 599       alphaValues >>= 8;
 600       currentRowSum += alphaValues & 0xff;
 601       *aDest++ = *aPreviousRow++ + currentRowSum;
 602 #endif
 603   }
 604   pixel = aSource[aSourceWidth - 1];
 605   for (uint32_t x = (aSourceWidth + aLeftInflation); x < (aSourceWidth + aLeftInflation + aRightInflation); x++) {
 606     currentRowSum += pixel;
 607     *aDest++ = currentRowSum + *aPreviousRow++;
 608   }
 609 }
 610
 611 MOZ_ALWAYS_INLINE void
 612 GenerateIntegralImage_C(int32_t aLeftInflation, int32_t aRightInflation,
 613                         int32_t aTopInflation, int32_t aBottomInflation,
 614                         uint32_t *aIntegralImage, size_t aIntegralImageStride,
 615                         uint8_t *aSource, int32_t aSourceStride, const IntSize &aSize)
 616 {
 617   uint32_t stride32bit = aIntegralImageStride / 4;
 618
 619   IntSize integralImageSize(aSize.width + aLeftInflation + aRightInflation,
 620                             aSize.height + aTopInflation + aBottomInflation);
 621
 622   memset(aIntegralImage, 0, aIntegralImageStride);
 623
 624   GenerateIntegralRow(aIntegralImage, aSource, aIntegralImage,
 625                       aSize.width, aLeftInflation, aRightInflation);
 626   for (int y = 1; y < aTopInflation + 1; y++) {
 627     GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource, aIntegralImage + (y - 1) * stride32bit,
 628                         aSize.width, aLeftInflation, aRightInflation);
 629   }
 630
 631   for (int y = aTopInflation + 1; y < (aSize.height + aTopInflation); y++) {
 632     GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + aSourceStride * (y - aTopInflation),
 633                         aIntegralImage + (y - 1) * stride32bit, aSize.width, aLeftInflation, aRightInflation);
 634   }
 635
 636   if (aBottomInflation) {
 637     for (int y = (aSize.height + aTopInflation); y < integralImageSize.height; y++) {
 638       GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + ((aSize.height - 1) * aSourceStride),
 639                           aIntegralImage + (y - 1) * stride32bit,
 640                           aSize.width, aLeftInflation, aRightInflation);
 641     }
 642   }
 643 }
 644
 645 /**
 646  * Attempt to do an in-place box blur using an integral image.
 647  */
 648 void
 649 AlphaBoxBlur::BoxBlur_C(uint8_t* aData,
 650                         int32_t aLeftLobe,
 651                         int32_t aRightLobe,
 652                         int32_t aTopLobe,
 653                         int32_t aBottomLobe,
 654                         uint32_t *aIntegralImage,
 655                         size_t aIntegralImageStride)
 656 {
 657   IntSize size = GetSize();
 658
 659   MOZ_ASSERT(size.width > 0);
 660
 661   // Our 'left' or 'top' lobe will include the current pixel. i.e. when
 662   // looking at an integral image the value of a pixel at 'x,y' is calculated
 663   // using the value of the integral image values above/below that.
 664   aLeftLobe++;
 665   aTopLobe++;
 666   int32_t boxSize = (aLeftLobe + aRightLobe) * (aTopLobe + aBottomLobe);
 667
 668   MOZ_ASSERT(boxSize > 0);
 669
 670   if (boxSize == 1) {
 671       return;
 672   }
 673
 674   int32_t stride32bit = aIntegralImageStride / 4;
 675
 676   int32_t leftInflation = RoundUpToMultipleOf4(aLeftLobe).value();
 677
 678   GenerateIntegralImage_C(leftInflation, aRightLobe, aTopLobe, aBottomLobe,
 679                           aIntegralImage, aIntegralImageStride, aData,
 680                           mStride, size);
 681
 682   uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
 683
 684   uint32_t *innerIntegral = aIntegralImage + (aTopLobe * stride32bit) + leftInflation;
 685
 686   // Storing these locally makes this about 30% faster! Presumably the compiler
 687   // can't be sure we're not altering the member variables in this loop.
 688   IntRect skipRect = mSkipRect;
 689   uint8_t *data = aData;
 690   int32_t stride = mStride;
 691   for (int32_t y = 0; y < size.height; y++) {
 692     bool inSkipRectY = y > skipRect.y && y < skipRect.YMost();
 693
 694     uint32_t *topLeftBase = innerIntegral + ((y - aTopLobe) * stride32bit - aLeftLobe);
 695     uint32_t *topRightBase = innerIntegral + ((y - aTopLobe) * stride32bit + aRightLobe);
 696     uint32_t *bottomRightBase = innerIntegral + ((y + aBottomLobe) * stride32bit + aRightLobe);
 697     uint32_t *bottomLeftBase = innerIntegral + ((y + aBottomLobe) * stride32bit - aLeftLobe);
 698
 699     for (int32_t x = 0; x < size.width; x++) {
 700       if (inSkipRectY && x > skipRect.x && x < skipRect.XMost()) {
 701         x = skipRect.XMost() - 1;
 702         // Trigger early jump on coming loop iterations, this will be reset
 703         // next line anyway.
 704         inSkipRectY = false;
 705         continue;
 706       }
 707       int32_t topLeft = topLeftBase[x];
 708       int32_t topRight = topRightBase[x];
 709       int32_t bottomRight = bottomRightBase[x];
 710       int32_t bottomLeft = bottomLeftBase[x];
 711
 712       uint32_t value = bottomRight - topRight - bottomLeft;
 713       value += topLeft;
 714
 715       data[stride * y + x] = (uint64_t(reciprocal) * value + (uint64_t(1) << 31)) >> 32;
 716     }
 717   }
 718 }
 719
 720 /**
 721  * Compute the box blur size (which we're calling the blur radius) from
 722  * the standard deviation.
 723  *
 724  * Much of this, the 3 * sqrt(2 * pi) / 4, is the known value for
 725  * approximating a Gaussian using box blurs.  This yields quite a good
 726  * approximation for a Gaussian.  Then we multiply this by 1.5 since our
 727  * code wants the radius of the entire triple-box-blur kernel instead of
 728  * the diameter of an individual box blur.  For more details, see:
 729  *   http://www.w3.org/TR/SVG11/filters.html#feGaussianBlurElement
 730  *   https://bugzilla.mozilla.org/show_bug.cgi?id=590039#c19
 731  */
 732 static const Float GAUSSIAN_SCALE_FACTOR = Float((3 * sqrt(2 * M_PI) / 4) * 1.5);
 733
 734 IntSize
 735 AlphaBoxBlur::CalculateBlurRadius(const Point& aStd)
 736 {
 737     IntSize size(static_cast<int32_t>(floor(aStd.x * GAUSSIAN_SCALE_FACTOR + 0.5f)),
 738                  static_cast<int32_t>(floor(aStd.y * GAUSSIAN_SCALE_FACTOR + 0.5f)));
 739
 740     return size;
 741 }
 742
 743 }
 744 }