gfx/2d/Swizzle.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "Swizzle.h"
   8 #include "Logging.h"
   9 #include "Tools.h"
  10 #include "mozilla/CheckedInt.h"
  11 #include "mozilla/EndianUtils.h"
  12
  13 #ifdef USE_SSE2
  14 #  include "mozilla/SSE.h"
  15 #endif
  16
  17 #ifdef USE_NEON
  18 #  include "mozilla/arm.h"
  19 #endif
  20
  21 namespace mozilla {
  22 namespace gfx {
  23
  24 /**
  25  * Convenience macros for dispatching to various format combinations.
  26  */
  27
  28 // Hash the formats to a relatively dense value to optimize jump table
  29 // generation. The first 6 formats in SurfaceFormat are the 32-bit BGRA variants
  30 // and are the most common formats dispatched here. Room is reserved in the
  31 // lowish bits for up to these 6 destination formats. If a destination format is
  32 // >= 6, the 6th bit is set to avoid collisions.
  33 #define FORMAT_KEY(aSrcFormat, aDstFormat) \
  34   (int(aSrcFormat) * 6 + int(aDstFormat) + (int(int(aDstFormat) >= 6) << 6))
  35
  36 #define FORMAT_CASE_EXPR(aSrcFormat, aDstFormat, ...) \
  37   case FORMAT_KEY(aSrcFormat, aDstFormat):            \
  38     __VA_ARGS__;                                      \
  39     return true;
  40
  41 #define FORMAT_CASE(aSrcFormat, aDstFormat, ...) \
  42   FORMAT_CASE_EXPR(aSrcFormat, aDstFormat, FORMAT_CASE_CALL(__VA_ARGS__))
  43
  44 #define FORMAT_CASE_ROW(aSrcFormat, aDstFormat, ...) \
  45   case FORMAT_KEY(aSrcFormat, aDstFormat):           \
  46     return &__VA_ARGS__;
  47
  48 /**
  49  * Constexpr functions for analyzing format attributes in templates.
  50  */
  51
  52 // Whether B comes before R in pixel memory layout.
  53 static constexpr bool IsBGRFormat(SurfaceFormat aFormat) {
  54   return aFormat == SurfaceFormat::B8G8R8A8 ||
  55 #if MOZ_LITTLE_ENDIAN()
  56          aFormat == SurfaceFormat::R5G6B5_UINT16 ||
  57 #endif
  58          aFormat == SurfaceFormat::B8G8R8X8 || aFormat == SurfaceFormat::B8G8R8;
  59 }
  60
  61 // Whether the order of B and R need to be swapped to map from src to dst.
  62 static constexpr bool ShouldSwapRB(SurfaceFormat aSrcFormat,
  63                                    SurfaceFormat aDstFormat) {
  64   return IsBGRFormat(aSrcFormat) != IsBGRFormat(aDstFormat);
  65 }
  66
  67 // The starting byte of the RGB components in pixel memory.
  68 static constexpr uint32_t RGBByteIndex(SurfaceFormat aFormat) {
  69   return aFormat == SurfaceFormat::A8R8G8B8 ||
  70                  aFormat == SurfaceFormat::X8R8G8B8
  71              ? 1
  72              : 0;
  73 }
  74
  75 // The byte of the alpha component, which just comes after RGB.
  76 static constexpr uint32_t AlphaByteIndex(SurfaceFormat aFormat) {
  77   return (RGBByteIndex(aFormat) + 3) % 4;
  78 }
  79
  80 // The endian-dependent bit shift to access RGB of a UINT32 pixel.
  81 static constexpr uint32_t RGBBitShift(SurfaceFormat aFormat) {
  82 #if MOZ_LITTLE_ENDIAN()
  83   return 8 * RGBByteIndex(aFormat);
  84 #else
  85   return 8 - 8 * RGBByteIndex(aFormat);
  86 #endif
  87 }
  88
  89 // The endian-dependent bit shift to access alpha of a UINT32 pixel.
  90 static constexpr uint32_t AlphaBitShift(SurfaceFormat aFormat) {
  91   return (RGBBitShift(aFormat) + 24) % 32;
  92 }
  93
  94 // Whether the pixel format should ignore the value of the alpha channel and
  95 // treat it as opaque.
  96 static constexpr bool IgnoreAlpha(SurfaceFormat aFormat) {
  97   return aFormat == SurfaceFormat::B8G8R8X8 ||
  98          aFormat == SurfaceFormat::R8G8B8X8 ||
  99          aFormat == SurfaceFormat::X8R8G8B8;
 100 }
 101
 102 // Whether to force alpha to opaque to map from src to dst.
 103 static constexpr bool ShouldForceOpaque(SurfaceFormat aSrcFormat,
 104                                         SurfaceFormat aDstFormat) {
 105   return IgnoreAlpha(aSrcFormat) != IgnoreAlpha(aDstFormat);
 106 }
 107
 108 #ifdef USE_SSE2
 109 /**
 110  * SSE2 optimizations
 111  */
 112
 113 template <bool aSwapRB, bool aOpaqueAlpha>
 114 void Premultiply_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
 115
 116 #  define PREMULTIPLY_SSE2(aSrcFormat, aDstFormat)                     \
 117     FORMAT_CASE(aSrcFormat, aDstFormat,                                \
 118                 Premultiply_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \
 119                                  ShouldForceOpaque(aSrcFormat, aDstFormat)>)
 120
 121 template <bool aSwapRB, bool aOpaqueAlpha>
 122 void PremultiplyRow_SSE2(const uint8_t*, uint8_t*, int32_t);
 123
 124 #  define PREMULTIPLY_ROW_SSE2(aSrcFormat, aDstFormat)            \
 125     FORMAT_CASE_ROW(                                              \
 126         aSrcFormat, aDstFormat,                                   \
 127         PremultiplyRow_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \
 128                             ShouldForceOpaque(aSrcFormat, aDstFormat)>)
 129
 130 template <bool aSwapRB>
 131 void Unpremultiply_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
 132
 133 #  define UNPREMULTIPLY_SSE2(aSrcFormat, aDstFormat) \
 134     FORMAT_CASE(aSrcFormat, aDstFormat,              \
 135                 Unpremultiply_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat)>)
 136
 137 template <bool aSwapRB>
 138 void UnpremultiplyRow_SSE2(const uint8_t*, uint8_t*, int32_t);
 139
 140 #  define UNPREMULTIPLY_ROW_SSE2(aSrcFormat, aDstFormat) \
 141     FORMAT_CASE_ROW(                                     \
 142         aSrcFormat, aDstFormat,                          \
 143         UnpremultiplyRow_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat)>)
 144
 145 template <bool aSwapRB, bool aOpaqueAlpha>
 146 void Swizzle_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
 147
 148 #  define SWIZZLE_SSE2(aSrcFormat, aDstFormat)                     \
 149     FORMAT_CASE(aSrcFormat, aDstFormat,                            \
 150                 Swizzle_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \
 151                              ShouldForceOpaque(aSrcFormat, aDstFormat)>)
 152
 153 template <bool aSwapRB, bool aOpaqueAlpha>
 154 void SwizzleRow_SSE2(const uint8_t*, uint8_t*, int32_t);
 155
 156 #  define SWIZZLE_ROW_SSE2(aSrcFormat, aDstFormat)            \
 157     FORMAT_CASE_ROW(                                          \
 158         aSrcFormat, aDstFormat,                               \
 159         SwizzleRow_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \
 160                         ShouldForceOpaque(aSrcFormat, aDstFormat)>)
 161
 162 template <bool aSwapRB>
 163 void UnpackRowRGB24_SSSE3(const uint8_t*, uint8_t*, int32_t);
 164
 165 #  define UNPACK_ROW_RGB_SSSE3(aDstFormat) \
 166     FORMAT_CASE_ROW(                       \
 167         SurfaceFormat::R8G8B8, aDstFormat, \
 168         UnpackRowRGB24_SSSE3<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>)
 169
 170 template <bool aSwapRB>
 171 void UnpackRowRGB24_AVX2(const uint8_t*, uint8_t*, int32_t);
 172
 173 #  define UNPACK_ROW_RGB_AVX2(aDstFormat)  \
 174     FORMAT_CASE_ROW(                       \
 175         SurfaceFormat::R8G8B8, aDstFormat, \
 176         UnpackRowRGB24_AVX2<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>)
 177
 178 #endif
 179
 180 #ifdef USE_NEON
 181 /**
 182  * ARM NEON optimizations
 183  */
 184
 185 template <bool aSwapRB, bool aOpaqueAlpha>
 186 void Premultiply_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
 187
 188 #  define PREMULTIPLY_NEON(aSrcFormat, aDstFormat)                     \
 189     FORMAT_CASE(aSrcFormat, aDstFormat,                                \
 190                 Premultiply_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \
 191                                  ShouldForceOpaque(aSrcFormat, aDstFormat)>)
 192
 193 template <bool aSwapRB, bool aOpaqueAlpha>
 194 void PremultiplyRow_NEON(const uint8_t*, uint8_t*, int32_t);
 195
 196 #  define PREMULTIPLY_ROW_NEON(aSrcFormat, aDstFormat)            \
 197     FORMAT_CASE_ROW(                                              \
 198         aSrcFormat, aDstFormat,                                   \
 199         PremultiplyRow_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \
 200                             ShouldForceOpaque(aSrcFormat, aDstFormat)>)
 201
 202 template <bool aSwapRB>
 203 void Unpremultiply_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
 204
 205 #  define UNPREMULTIPLY_NEON(aSrcFormat, aDstFormat) \
 206     FORMAT_CASE(aSrcFormat, aDstFormat,              \
 207                 Unpremultiply_NEON<ShouldSwapRB(aSrcFormat, aDstFormat)>)
 208
 209 template <bool aSwapRB>
 210 void UnpremultiplyRow_NEON(const uint8_t*, uint8_t*, int32_t);
 211
 212 #  define UNPREMULTIPLY_ROW_NEON(aSrcFormat, aDstFormat) \
 213     FORMAT_CASE_ROW(                                     \
 214         aSrcFormat, aDstFormat,                          \
 215         UnpremultiplyRow_NEON<ShouldSwapRB(aSrcFormat, aDstFormat)>)
 216
 217 template <bool aSwapRB, bool aOpaqueAlpha>
 218 void Swizzle_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
 219
 220 #  define SWIZZLE_NEON(aSrcFormat, aDstFormat)                     \
 221     FORMAT_CASE(aSrcFormat, aDstFormat,                            \
 222                 Swizzle_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \
 223                              ShouldForceOpaque(aSrcFormat, aDstFormat)>)
 224
 225 template <bool aSwapRB, bool aOpaqueAlpha>
 226 void SwizzleRow_NEON(const uint8_t*, uint8_t*, int32_t);
 227
 228 #  define SWIZZLE_ROW_NEON(aSrcFormat, aDstFormat)            \
 229     FORMAT_CASE_ROW(                                          \
 230         aSrcFormat, aDstFormat,                               \
 231         SwizzleRow_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \
 232                         ShouldForceOpaque(aSrcFormat, aDstFormat)>)
 233
 234 template <bool aSwapRB>
 235 void UnpackRowRGB24_NEON(const uint8_t*, uint8_t*, int32_t);
 236
 237 #  define UNPACK_ROW_RGB_NEON(aDstFormat)  \
 238     FORMAT_CASE_ROW(                       \
 239         SurfaceFormat::R8G8B8, aDstFormat, \
 240         UnpackRowRGB24_NEON<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>)
 241 #endif
 242
 243 /**
 244  * Premultiplying
 245  */
 246
 247 // Fallback premultiply implementation that uses splayed pixel math to reduce
 248 // the multiplications used. That is, the R and B components are isolated from
 249 // the G and A components, which then can be multiplied as if they were two
 250 // 2-component vectors. Otherwise, an approximation if divide-by-255 is used
 251 // which is faster than an actual division. These optimizations are also used
 252 // for the SSE2 and NEON implementations.
 253 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift,
 254           uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift>
 255 static void PremultiplyChunkFallback(const uint8_t*& aSrc, uint8_t*& aDst,
 256                                      int32_t aLength) {
 257   const uint8_t* end = aSrc + 4 * aLength;
 258   do {
 259     // Load and process 1 entire pixel at a time.
 260     uint32_t color = *reinterpret_cast<const uint32_t*>(aSrc);
 261
 262     uint32_t a = aSrcAShift ? color >> aSrcAShift : color & 0xFF;
 263
 264     // Isolate the R and B components.
 265     uint32_t rb = (color >> aSrcRGBShift) & 0x00FF00FF;
 266     // Swap the order of R and B if necessary.
 267     if (aSwapRB) {
 268       rb = (rb >> 16) | (rb << 16);
 269     }
 270     // Approximate the multiply by alpha and divide by 255 which is
 271     // essentially:
 272     // c = c*a + 255; c = (c + (c >> 8)) >> 8;
 273     // However, we omit the final >> 8 to fold it with the final shift into
 274     // place depending on desired output format.
 275     rb = rb * a + 0x00FF00FF;
 276     rb = (rb + ((rb >> 8) & 0x00FF00FF)) & 0xFF00FF00;
 277
 278     // Use same approximation as above, but G is shifted 8 bits left.
 279     // Alpha is left out and handled separately.
 280     uint32_t g = color & (0xFF00 << aSrcRGBShift);
 281     g = g * a + (0xFF00 << aSrcRGBShift);
 282     g = (g + (g >> 8)) & (0xFF0000 << aSrcRGBShift);
 283
 284     // The above math leaves RGB shifted left by 8 bits.
 285     // Shift them right if required for the output format.
 286     // then combine them back together to produce output pixel.
 287     // Add the alpha back on if the output format is not opaque.
 288     *reinterpret_cast<uint32_t*>(aDst) =
 289         (rb >> (8 - aDstRGBShift)) | (g >> (8 + aSrcRGBShift - aDstRGBShift)) |
 290         (aOpaqueAlpha ? 0xFF << aDstAShift : a << aDstAShift);
 291
 292     aSrc += 4;
 293     aDst += 4;
 294   } while (aSrc < end);
 295 }
 296
 297 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift,
 298           uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift>
 299 static void PremultiplyRowFallback(const uint8_t* aSrc, uint8_t* aDst,
 300                                    int32_t aLength) {
 301   PremultiplyChunkFallback<aSwapRB, aOpaqueAlpha, aSrcRGBShift, aSrcAShift,
 302                            aDstRGBShift, aDstAShift>(aSrc, aDst, aLength);
 303 }
 304
 305 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift,
 306           uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift>
 307 static void PremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
 308                                 uint8_t* aDst, int32_t aDstGap, IntSize aSize) {
 309   for (int32_t height = aSize.height; height > 0; height--) {
 310     PremultiplyChunkFallback<aSwapRB, aOpaqueAlpha, aSrcRGBShift, aSrcAShift,
 311                              aDstRGBShift, aDstAShift>(aSrc, aDst, aSize.width);
 312     aSrc += aSrcGap;
 313     aDst += aDstGap;
 314   }
 315 }
 316
 317 #define PREMULTIPLY_FALLBACK_CASE(aSrcFormat, aDstFormat)                     \
 318   FORMAT_CASE(                                                                \
 319       aSrcFormat, aDstFormat,                                                 \
 320       PremultiplyFallback<ShouldSwapRB(aSrcFormat, aDstFormat),               \
 321                           ShouldForceOpaque(aSrcFormat, aDstFormat),          \
 322                           RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
 323                           RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)
 324
 325 #define PREMULTIPLY_FALLBACK(aSrcFormat)                         \
 326   PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
 327   PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8X8) \
 328   PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
 329   PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8X8) \
 330   PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8) \
 331   PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::X8R8G8B8)
 332
 333 #define PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, aDstFormat)             \
 334   FORMAT_CASE_ROW(aSrcFormat, aDstFormat,                                 \
 335                   PremultiplyRowFallback<                                 \
 336                       ShouldSwapRB(aSrcFormat, aDstFormat),               \
 337                       ShouldForceOpaque(aSrcFormat, aDstFormat),          \
 338                       RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
 339                       RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)
 340
 341 #define PREMULTIPLY_ROW_FALLBACK(aSrcFormat)                         \
 342   PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
 343   PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8X8) \
 344   PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
 345   PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8X8) \
 346   PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8) \
 347   PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::X8R8G8B8)
 348
 349 // If rows are tightly packed, and the size of the total area will fit within
 350 // the precision range of a single row, then process all the data as if it was
 351 // a single row.
 352 static inline IntSize CollapseSize(const IntSize& aSize, int32_t aSrcStride,
 353                                    int32_t aDstStride) {
 354   if (aSrcStride == aDstStride && (aSrcStride & 3) == 0 &&
 355       aSrcStride / 4 == aSize.width) {
 356     CheckedInt32 area = CheckedInt32(aSize.width) * CheckedInt32(aSize.height);
 357     if (area.isValid()) {
 358       return IntSize(area.value(), 1);
 359     }
 360   }
 361   return aSize;
 362 }
 363
 364 static inline int32_t GetStrideGap(int32_t aWidth, SurfaceFormat aFormat,
 365                                    int32_t aStride) {
 366   CheckedInt32 used = CheckedInt32(aWidth) * BytesPerPixel(aFormat);
 367   if (!used.isValid() || used.value() < 0) {
 368     return -1;
 369   }
 370   return aStride - used.value();
 371 }
 372
 373 bool PremultiplyData(const uint8_t* aSrc, int32_t aSrcStride,
 374                      SurfaceFormat aSrcFormat, uint8_t* aDst,
 375                      int32_t aDstStride, SurfaceFormat aDstFormat,
 376                      const IntSize& aSize) {
 377   if (aSize.IsEmpty()) {
 378     return true;
 379   }
 380   IntSize size = CollapseSize(aSize, aSrcStride, aDstStride);
 381   // Find gap from end of row to the start of the next row.
 382   int32_t srcGap = GetStrideGap(aSize.width, aSrcFormat, aSrcStride);
 383   int32_t dstGap = GetStrideGap(aSize.width, aDstFormat, aDstStride);
 384   MOZ_ASSERT(srcGap >= 0 && dstGap >= 0);
 385   if (srcGap < 0 || dstGap < 0) {
 386     return false;
 387   }
 388
 389 #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size)
 390
 391 #ifdef USE_SSE2
 392   if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
 393       PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
 394       PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
 395       PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
 396       PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
 397       PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
 398       PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
 399       PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
 400       PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
 401       default:
 402         break;
 403     }
 404 #endif
 405
 406 #ifdef USE_NEON
 407   if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
 408       PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
 409       PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
 410       PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
 411       PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
 412       PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
 413       PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
 414       PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
 415       PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
 416       default:
 417         break;
 418     }
 419 #endif
 420
 421   switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
 422     PREMULTIPLY_FALLBACK(SurfaceFormat::B8G8R8A8)
 423     PREMULTIPLY_FALLBACK(SurfaceFormat::R8G8B8A8)
 424     PREMULTIPLY_FALLBACK(SurfaceFormat::A8R8G8B8)
 425     default:
 426       break;
 427   }
 428
 429 #undef FORMAT_CASE_CALL
 430
 431   MOZ_ASSERT(false, "Unsupported premultiply formats");
 432   return false;
 433 }
 434
 435 SwizzleRowFn PremultiplyRow(SurfaceFormat aSrcFormat,
 436                             SurfaceFormat aDstFormat) {
 437 #ifdef USE_SSE2
 438   if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
 439       PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
 440       PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
 441       PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
 442       PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
 443       PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
 444       PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
 445       PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
 446       PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
 447       default:
 448         break;
 449     }
 450 #endif
 451
 452 #ifdef USE_NEON
 453   if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
 454       PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
 455       PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
 456       PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
 457       PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
 458       PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
 459       PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
 460       PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
 461       PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
 462       default:
 463         break;
 464     }
 465 #endif
 466
 467   switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
 468     PREMULTIPLY_ROW_FALLBACK(SurfaceFormat::B8G8R8A8)
 469     PREMULTIPLY_ROW_FALLBACK(SurfaceFormat::R8G8B8A8)
 470     PREMULTIPLY_ROW_FALLBACK(SurfaceFormat::A8R8G8B8)
 471     default:
 472       break;
 473   }
 474
 475   MOZ_ASSERT_UNREACHABLE("Unsupported premultiply formats");
 476   return nullptr;
 477 }
 478
 479 /**
 480  * Unpremultiplying
 481  */
 482
 483 // Generate a table of 8.16 fixed-point reciprocals representing 1/alpha.
 484 #define UNPREMULQ(x) (0xFF00FFU / (x))
 485 #define UNPREMULQ_2(x) UNPREMULQ(x), UNPREMULQ((x) + 1)
 486 #define UNPREMULQ_4(x) UNPREMULQ_2(x), UNPREMULQ_2((x) + 2)
 487 #define UNPREMULQ_8(x) UNPREMULQ_4(x), UNPREMULQ_4((x) + 4)
 488 #define UNPREMULQ_16(x) UNPREMULQ_8(x), UNPREMULQ_8((x) + 8)
 489 #define UNPREMULQ_32(x) UNPREMULQ_16(x), UNPREMULQ_16((x) + 16)
 490 static const uint32_t sUnpremultiplyTable[256] = {0,
 491                                                   UNPREMULQ(1),
 492                                                   UNPREMULQ_2(2),
 493                                                   UNPREMULQ_4(4),
 494                                                   UNPREMULQ_8(8),
 495                                                   UNPREMULQ_16(16),
 496                                                   UNPREMULQ_32(32),
 497                                                   UNPREMULQ_32(64),
 498                                                   UNPREMULQ_32(96),
 499                                                   UNPREMULQ_32(128),
 500                                                   UNPREMULQ_32(160),
 501                                                   UNPREMULQ_32(192),
 502                                                   UNPREMULQ_32(224)};
 503
 504 // Fallback unpremultiply implementation that uses 8.16 fixed-point reciprocal
 505 // math to eliminate any division by the alpha component. This optimization is
 506 // used for the SSE2 and NEON implementations, with some adaptations. This
 507 // implementation also accesses color components using individual byte accesses
 508 // as this profiles faster than accessing the pixel as a uint32_t and
 509 // shifting/masking to access components.
 510 template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
 511           uint32_t aDstRGBIndex, uint32_t aDstAIndex>
 512 static void UnpremultiplyChunkFallback(const uint8_t*& aSrc, uint8_t*& aDst,
 513                                        int32_t aLength) {
 514   const uint8_t* end = aSrc + 4 * aLength;
 515   do {
 516     uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)];
 517     uint8_t g = aSrc[aSrcRGBIndex + 1];
 518     uint8_t b = aSrc[aSrcRGBIndex + (aSwapRB ? 0 : 2)];
 519     uint8_t a = aSrc[aSrcAIndex];
 520
 521     // Access the 8.16 reciprocal from the table based on alpha. Multiply by
 522     // the reciprocal and shift off the fraction bits to approximate the
 523     // division by alpha.
 524     uint32_t q = sUnpremultiplyTable[a];
 525     aDst[aDstRGBIndex + 0] = (r * q) >> 16;
 526     aDst[aDstRGBIndex + 1] = (g * q) >> 16;
 527     aDst[aDstRGBIndex + 2] = (b * q) >> 16;
 528     aDst[aDstAIndex] = a;
 529
 530     aSrc += 4;
 531     aDst += 4;
 532   } while (aSrc < end);
 533 }
 534
 535 template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
 536           uint32_t aDstRGBIndex, uint32_t aDstAIndex>
 537 static void UnpremultiplyRowFallback(const uint8_t* aSrc, uint8_t* aDst,
 538                                      int32_t aLength) {
 539   UnpremultiplyChunkFallback<aSwapRB, aSrcRGBIndex, aSrcAIndex, aDstRGBIndex,
 540                              aDstAIndex>(aSrc, aDst, aLength);
 541 }
 542
 543 template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
 544           uint32_t aDstRGBIndex, uint32_t aDstAIndex>
 545 static void UnpremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
 546                                   uint8_t* aDst, int32_t aDstGap,
 547                                   IntSize aSize) {
 548   for (int32_t height = aSize.height; height > 0; height--) {
 549     UnpremultiplyChunkFallback<aSwapRB, aSrcRGBIndex, aSrcAIndex, aDstRGBIndex,
 550                                aDstAIndex>(aSrc, aDst, aSize.width);
 551     aSrc += aSrcGap;
 552     aDst += aDstGap;
 553   }
 554 }
 555
 556 #define UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, aDstFormat)             \
 557   FORMAT_CASE(aSrcFormat, aDstFormat,                                   \
 558               UnpremultiplyFallback<                                    \
 559                   ShouldSwapRB(aSrcFormat, aDstFormat),                 \
 560                   RGBByteIndex(aSrcFormat), AlphaByteIndex(aSrcFormat), \
 561                   RGBByteIndex(aDstFormat), AlphaByteIndex(aDstFormat)>)
 562
 563 #define UNPREMULTIPLY_FALLBACK(aSrcFormat)                         \
 564   UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
 565   UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
 566   UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8)
 567
 568 #define UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, aDstFormat)             \
 569   FORMAT_CASE_ROW(aSrcFormat, aDstFormat,                                   \
 570                   UnpremultiplyRowFallback<                                 \
 571                       ShouldSwapRB(aSrcFormat, aDstFormat),                 \
 572                       RGBByteIndex(aSrcFormat), AlphaByteIndex(aSrcFormat), \
 573                       RGBByteIndex(aDstFormat), AlphaByteIndex(aDstFormat)>)
 574
 575 #define UNPREMULTIPLY_ROW_FALLBACK(aSrcFormat)                         \
 576   UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
 577   UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
 578   UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8)
 579
 580 bool UnpremultiplyData(const uint8_t* aSrc, int32_t aSrcStride,
 581                        SurfaceFormat aSrcFormat, uint8_t* aDst,
 582                        int32_t aDstStride, SurfaceFormat aDstFormat,
 583                        const IntSize& aSize) {
 584   if (aSize.IsEmpty()) {
 585     return true;
 586   }
 587   IntSize size = CollapseSize(aSize, aSrcStride, aDstStride);
 588   // Find gap from end of row to the start of the next row.
 589   int32_t srcGap = GetStrideGap(aSize.width, aSrcFormat, aSrcStride);
 590   int32_t dstGap = GetStrideGap(aSize.width, aDstFormat, aDstStride);
 591   MOZ_ASSERT(srcGap >= 0 && dstGap >= 0);
 592   if (srcGap < 0 || dstGap < 0) {
 593     return false;
 594   }
 595
 596 #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size)
 597
 598 #ifdef USE_SSE2
 599   if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
 600       UNPREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
 601       UNPREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
 602       UNPREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
 603       UNPREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
 604       default:
 605         break;
 606     }
 607 #endif
 608
 609 #ifdef USE_NEON
 610   if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
 611       UNPREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
 612       UNPREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
 613       UNPREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
 614       UNPREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
 615       default:
 616         break;
 617     }
 618 #endif
 619
 620   switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
 621     UNPREMULTIPLY_FALLBACK(SurfaceFormat::B8G8R8A8)
 622     UNPREMULTIPLY_FALLBACK(SurfaceFormat::R8G8B8A8)
 623     UNPREMULTIPLY_FALLBACK(SurfaceFormat::A8R8G8B8)
 624     default:
 625       break;
 626   }
 627
 628 #undef FORMAT_CASE_CALL
 629
 630   MOZ_ASSERT(false, "Unsupported unpremultiply formats");
 631   return false;
 632 }
 633
 634 SwizzleRowFn UnpremultiplyRow(SurfaceFormat aSrcFormat,
 635                               SurfaceFormat aDstFormat) {
 636 #ifdef USE_SSE2
 637   if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
 638       UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
 639       UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
 640       UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
 641       UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
 642       default:
 643         break;
 644     }
 645 #endif
 646
 647 #ifdef USE_NEON
 648   if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
 649       UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
 650       UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
 651       UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
 652       UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
 653       default:
 654         break;
 655     }
 656 #endif
 657
 658   switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
 659     UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::B8G8R8A8)
 660     UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::R8G8B8A8)
 661     UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::A8R8G8B8)
 662     default:
 663       break;
 664   }
 665
 666   MOZ_ASSERT_UNREACHABLE("Unsupported premultiply formats");
 667   return nullptr;
 668 }
 669
 670 /**
 671  * Swizzling
 672  */
 673
 674 // Fallback swizzle implementation that uses shifting and masking to reorder
 675 // pixels.
 676 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift,
 677           uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift>
 678 static void SwizzleChunkFallback(const uint8_t*& aSrc, uint8_t*& aDst,
 679                                  int32_t aLength) {
 680   const uint8_t* end = aSrc + 4 * aLength;
 681   do {
 682     uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc);
 683
 684     if (aSwapRB) {
 685       // Handle R and B swaps by exchanging words and masking.
 686       uint32_t rb =
 687           ((rgba << 16) | (rgba >> 16)) & (0x00FF00FF << aSrcRGBShift);
 688       uint32_t ga = rgba & ((0xFF << aSrcAShift) | (0xFF00 << aSrcRGBShift));
 689       rgba = rb | ga;
 690     }
 691
 692     // If src and dst shifts differ, rotate left or right to move RGB into
 693     // place, i.e. ARGB -> RGBA or ARGB -> RGBA.
 694     if (aDstRGBShift > aSrcRGBShift) {
 695       rgba = (rgba << 8) | (aOpaqueAlpha ? 0x000000FF : rgba >> 24);
 696     } else if (aSrcRGBShift > aDstRGBShift) {
 697       rgba = (rgba >> 8) | (aOpaqueAlpha ? 0xFF000000 : rgba << 24);
 698     } else if (aOpaqueAlpha) {
 699       rgba |= 0xFF << aDstAShift;
 700     }
 701
 702     *reinterpret_cast<uint32_t*>(aDst) = rgba;
 703
 704     aSrc += 4;
 705     aDst += 4;
 706   } while (aSrc < end);
 707 }
 708
 709 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift,
 710           uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift>
 711 static void SwizzleRowFallback(const uint8_t* aSrc, uint8_t* aDst,
 712                                int32_t aLength) {
 713   SwizzleChunkFallback<aSwapRB, aOpaqueAlpha, aSrcRGBShift, aSrcAShift,
 714                        aDstRGBShift, aDstAShift>(aSrc, aDst, aLength);
 715 }
 716
 717 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift,
 718           uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift>
 719 static void SwizzleFallback(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
 720                             int32_t aDstGap, IntSize aSize) {
 721   for (int32_t height = aSize.height; height > 0; height--) {
 722     SwizzleChunkFallback<aSwapRB, aOpaqueAlpha, aSrcRGBShift, aSrcAShift,
 723                          aDstRGBShift, aDstAShift>(aSrc, aDst, aSize.width);
 724     aSrc += aSrcGap;
 725     aDst += aDstGap;
 726   }
 727 }
 728
 729 #define SWIZZLE_FALLBACK(aSrcFormat, aDstFormat)                          \
 730   FORMAT_CASE(                                                            \
 731       aSrcFormat, aDstFormat,                                             \
 732       SwizzleFallback<ShouldSwapRB(aSrcFormat, aDstFormat),               \
 733                       ShouldForceOpaque(aSrcFormat, aDstFormat),          \
 734                       RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
 735                       RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)
 736
 737 #define SWIZZLE_ROW_FALLBACK(aSrcFormat, aDstFormat)                         \
 738   FORMAT_CASE_ROW(                                                           \
 739       aSrcFormat, aDstFormat,                                                \
 740       SwizzleRowFallback<ShouldSwapRB(aSrcFormat, aDstFormat),               \
 741                          ShouldForceOpaque(aSrcFormat, aDstFormat),          \
 742                          RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
 743                          RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)
 744
 745 // Fast-path for matching formats.
 746 template <int32_t aBytesPerPixel>
 747 static void SwizzleRowCopy(const uint8_t* aSrc, uint8_t* aDst,
 748                            int32_t aLength) {
 749   if (aSrc != aDst) {
 750     memcpy(aDst, aSrc, aLength * aBytesPerPixel);
 751   }
 752 }
 753
 754 // Fast-path for matching formats.
 755 static void SwizzleCopy(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
 756                         int32_t aDstGap, IntSize aSize, int32_t aBPP) {
 757   if (aSrc != aDst) {
 758     int32_t rowLength = aBPP * aSize.width;
 759     for (int32_t height = aSize.height; height > 0; height--) {
 760       memcpy(aDst, aSrc, rowLength);
 761       aSrc += rowLength + aSrcGap;
 762       aDst += rowLength + aDstGap;
 763     }
 764   }
 765 }
 766
 767 // Fast-path for conversions that swap all bytes.
 768 template <bool aOpaqueAlpha, uint32_t aSrcAShift, uint32_t aDstAShift>
 769 static void SwizzleChunkSwap(const uint8_t*& aSrc, uint8_t*& aDst,
 770                              int32_t aLength) {
 771   const uint8_t* end = aSrc + 4 * aLength;
 772   do {
 773     // Use an endian swap to move the bytes, i.e. BGRA -> ARGB.
 774     uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc);
 775 #if MOZ_LITTLE_ENDIAN()
 776     rgba = NativeEndian::swapToBigEndian(rgba);
 777 #else
 778     rgba = NativeEndian::swapToLittleEndian(rgba);
 779 #endif
 780     if (aOpaqueAlpha) {
 781       rgba |= 0xFF << aDstAShift;
 782     }
 783     *reinterpret_cast<uint32_t*>(aDst) = rgba;
 784     aSrc += 4;
 785     aDst += 4;
 786   } while (aSrc < end);
 787 }
 788
 789 template <bool aOpaqueAlpha, uint32_t aSrcAShift, uint32_t aDstAShift>
 790 static void SwizzleRowSwap(const uint8_t* aSrc, uint8_t* aDst,
 791                            int32_t aLength) {
 792   SwizzleChunkSwap<aOpaqueAlpha, aSrcAShift, aDstAShift>(aSrc, aDst, aLength);
 793 }
 794
 795 template <bool aOpaqueAlpha, uint32_t aSrcAShift, uint32_t aDstAShift>
 796 static void SwizzleSwap(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
 797                         int32_t aDstGap, IntSize aSize) {
 798   for (int32_t height = aSize.height; height > 0; height--) {
 799     SwizzleChunkSwap<aOpaqueAlpha, aSrcAShift, aDstAShift>(aSrc, aDst,
 800                                                            aSize.width);
 801     aSrc += aSrcGap;
 802     aDst += aDstGap;
 803   }
 804 }
 805
 806 #define SWIZZLE_SWAP(aSrcFormat, aDstFormat)                 \
 807   FORMAT_CASE(                                               \
 808       aSrcFormat, aDstFormat,                                \
 809       SwizzleSwap<ShouldForceOpaque(aSrcFormat, aDstFormat), \
 810                   AlphaBitShift(aSrcFormat), AlphaBitShift(aDstFormat)>)
 811
 812 #define SWIZZLE_ROW_SWAP(aSrcFormat, aDstFormat)                \
 813   FORMAT_CASE_ROW(                                              \
 814       aSrcFormat, aDstFormat,                                   \
 815       SwizzleRowSwap<ShouldForceOpaque(aSrcFormat, aDstFormat), \
 816                      AlphaBitShift(aSrcFormat), AlphaBitShift(aDstFormat)>)
 817
 818 static void SwizzleChunkSwapRGB24(const uint8_t*& aSrc, uint8_t*& aDst,
 819                                   int32_t aLength) {
 820   const uint8_t* end = aSrc + 3 * aLength;
 821   do {
 822     uint8_t r = aSrc[0];
 823     uint8_t g = aSrc[1];
 824     uint8_t b = aSrc[2];
 825     aDst[0] = b;
 826     aDst[1] = g;
 827     aDst[2] = r;
 828     aSrc += 3;
 829     aDst += 3;
 830   } while (aSrc < end);
 831 }
 832
 833 static void SwizzleRowSwapRGB24(const uint8_t* aSrc, uint8_t* aDst,
 834                                 int32_t aLength) {
 835   SwizzleChunkSwapRGB24(aSrc, aDst, aLength);
 836 }
 837
 838 static void SwizzleSwapRGB24(const uint8_t* aSrc, int32_t aSrcGap,
 839                              uint8_t* aDst, int32_t aDstGap, IntSize aSize) {
 840   for (int32_t height = aSize.height; height > 0; height--) {
 841     SwizzleChunkSwapRGB24(aSrc, aDst, aSize.width);
 842     aSrc += aSrcGap;
 843     aDst += aDstGap;
 844   }
 845 }
 846
 847 #define SWIZZLE_SWAP_RGB24(aSrcFormat, aDstFormat) \
 848   FORMAT_CASE(aSrcFormat, aDstFormat, SwizzleSwapRGB24)
 849
 850 #define SWIZZLE_ROW_SWAP_RGB24(aSrcFormat, aDstFormat) \
 851   FORMAT_CASE_ROW(aSrcFormat, aDstFormat, SwizzleRowSwapRGB24)
 852
 853 // Fast-path for conversions that force alpha to opaque.
 854 template <uint32_t aDstAShift>
 855 static void SwizzleChunkOpaqueUpdate(uint8_t*& aBuffer, int32_t aLength) {
 856   const uint8_t* end = aBuffer + 4 * aLength;
 857   do {
 858     uint32_t rgba = *reinterpret_cast<const uint32_t*>(aBuffer);
 859     // Just add on the alpha bits to the source.
 860     rgba |= 0xFF << aDstAShift;
 861     *reinterpret_cast<uint32_t*>(aBuffer) = rgba;
 862     aBuffer += 4;
 863   } while (aBuffer < end);
 864 }
 865
 866 template <uint32_t aDstAShift>
 867 static void SwizzleChunkOpaqueCopy(const uint8_t*& aSrc, uint8_t* aDst,
 868                                    int32_t aLength) {
 869   const uint8_t* end = aSrc + 4 * aLength;
 870   do {
 871     uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc);
 872     // Just add on the alpha bits to the source.
 873     rgba |= 0xFF << aDstAShift;
 874     *reinterpret_cast<uint32_t*>(aDst) = rgba;
 875     aSrc += 4;
 876     aDst += 4;
 877   } while (aSrc < end);
 878 }
 879
 880 template <uint32_t aDstAShift>
 881 static void SwizzleRowOpaque(const uint8_t* aSrc, uint8_t* aDst,
 882                              int32_t aLength) {
 883   if (aSrc == aDst) {
 884     SwizzleChunkOpaqueUpdate<aDstAShift>(aDst, aLength);
 885   } else {
 886     SwizzleChunkOpaqueCopy<aDstAShift>(aSrc, aDst, aLength);
 887   }
 888 }
 889
 890 template <uint32_t aDstAShift>
 891 static void SwizzleOpaque(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
 892                           int32_t aDstGap, IntSize aSize) {
 893   if (aSrc == aDst) {
 894     // Modifying in-place, so just write out the alpha.
 895     for (int32_t height = aSize.height; height > 0; height--) {
 896       SwizzleChunkOpaqueUpdate<aDstAShift>(aDst, aSize.width);
 897       aDst += aDstGap;
 898     }
 899   } else {
 900     for (int32_t height = aSize.height; height > 0; height--) {
 901       SwizzleChunkOpaqueCopy<aDstAShift>(aSrc, aDst, aSize.width);
 902       aSrc += aSrcGap;
 903       aDst += aDstGap;
 904     }
 905   }
 906 }
 907
 908 #define SWIZZLE_OPAQUE(aSrcFormat, aDstFormat) \
 909   FORMAT_CASE(aSrcFormat, aDstFormat, SwizzleOpaque<AlphaBitShift(aDstFormat)>)
 910
 911 #define SWIZZLE_ROW_OPAQUE(aSrcFormat, aDstFormat) \
 912   FORMAT_CASE_ROW(aSrcFormat, aDstFormat,          \
 913                   SwizzleRowOpaque<AlphaBitShift(aDstFormat)>)
 914
 915 // Packing of 32-bit formats to RGB565.
 916 template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
 917 static void PackToRGB565(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
 918                          int32_t aDstGap, IntSize aSize) {
 919   for (int32_t height = aSize.height; height > 0; height--) {
 920     const uint8_t* end = aSrc + 4 * aSize.width;
 921     do {
 922       uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc);
 923
 924       // Isolate the R, G, and B components and shift to final endian-dependent
 925       // locations.
 926       uint16_t rgb565;
 927       if (aSwapRB) {
 928         rgb565 = ((rgba & (0xF8 << aSrcRGBShift)) << (8 - aSrcRGBShift)) |
 929                  ((rgba & (0xFC00 << aSrcRGBShift)) >> (5 + aSrcRGBShift)) |
 930                  ((rgba & (0xF80000 << aSrcRGBShift)) >> (19 + aSrcRGBShift));
 931       } else {
 932         rgb565 = ((rgba & (0xF8 << aSrcRGBShift)) >> (3 + aSrcRGBShift)) |
 933                  ((rgba & (0xFC00 << aSrcRGBShift)) >> (5 + aSrcRGBShift)) |
 934                  ((rgba & (0xF80000 << aSrcRGBShift)) >> (8 + aSrcRGBShift));
 935       }
 936
 937       *reinterpret_cast<uint16_t*>(aDst) = rgb565;
 938
 939       aSrc += 4;
 940       aDst += 2;
 941     } while (aSrc < end);
 942
 943     aSrc += aSrcGap;
 944     aDst += aDstGap;
 945   }
 946 }
 947
 948 // Packing of 32-bit formats to 24-bit formats.
 949 template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
 950 static void PackChunkToRGB24(const uint8_t*& aSrc, uint8_t*& aDst,
 951                              int32_t aLength) {
 952   const uint8_t* end = aSrc + 4 * aLength;
 953   do {
 954     uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)];
 955     uint8_t g = aSrc[aSrcRGBIndex + 1];
 956     uint8_t b = aSrc[aSrcRGBIndex + (aSwapRB ? 0 : 2)];
 957
 958     aDst[0] = r;
 959     aDst[1] = g;
 960     aDst[2] = b;
 961
 962     aSrc += 4;
 963     aDst += 3;
 964   } while (aSrc < end);
 965 }
 966
 967 template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
 968 static void PackRowToRGB24(const uint8_t* aSrc, uint8_t* aDst,
 969                            int32_t aLength) {
 970   PackChunkToRGB24<aSwapRB, aSrcRGBShift, aSrcRGBIndex>(aSrc, aDst, aLength);
 971 }
 972
 973 template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
 974 static void PackToRGB24(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
 975                         int32_t aDstGap, IntSize aSize) {
 976   for (int32_t height = aSize.height; height > 0; height--) {
 977     PackChunkToRGB24<aSwapRB, aSrcRGBShift, aSrcRGBIndex>(aSrc, aDst,
 978                                                           aSize.width);
 979     aSrc += aSrcGap;
 980     aDst += aDstGap;
 981   }
 982 }
 983
 984 #define PACK_RGB_CASE(aSrcFormat, aDstFormat, aPackFunc)      \
 985   FORMAT_CASE(aSrcFormat, aDstFormat,                         \
 986               aPackFunc<ShouldSwapRB(aSrcFormat, aDstFormat), \
 987                         RGBBitShift(aSrcFormat), RGBByteIndex(aSrcFormat)>)
 988
 989 #define PACK_RGB(aDstFormat, aPackFunc)                         \
 990   PACK_RGB_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \
 991   PACK_RGB_CASE(SurfaceFormat::B8G8R8X8, aDstFormat, aPackFunc) \
 992   PACK_RGB_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \
 993   PACK_RGB_CASE(SurfaceFormat::R8G8B8X8, aDstFormat, aPackFunc) \
 994   PACK_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \
 995   PACK_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc)
 996
 997 #define PACK_ROW_RGB_CASE(aSrcFormat, aDstFormat, aPackFunc)                   \
 998   FORMAT_CASE_ROW(                                                             \
 999       aSrcFormat, aDstFormat,                                                  \
1000       aPackFunc<ShouldSwapRB(aSrcFormat, aDstFormat), RGBBitShift(aSrcFormat), \
1001                 RGBByteIndex(aSrcFormat)>)
1002
1003 #define PACK_ROW_RGB(aDstFormat, aPackFunc)                         \
1004   PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \
1005   PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8X8, aDstFormat, aPackFunc) \
1006   PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \
1007   PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8X8, aDstFormat, aPackFunc) \
1008   PACK_ROW_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \
1009   PACK_ROW_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc)
1010
1011 // Packing of 32-bit formats to A8.
1012 template <uint32_t aSrcAIndex>
1013 static void PackToA8(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
1014                      int32_t aDstGap, IntSize aSize) {
1015   for (int32_t height = aSize.height; height > 0; height--) {
1016     const uint8_t* end = aSrc + 4 * aSize.width;
1017     do {
1018       *aDst++ = aSrc[aSrcAIndex];
1019       aSrc += 4;
1020     } while (aSrc < end);
1021     aSrc += aSrcGap;
1022     aDst += aDstGap;
1023   }
1024 }
1025
1026 #define PACK_ALPHA_CASE(aSrcFormat, aDstFormat, aPackFunc) \
1027   FORMAT_CASE(aSrcFormat, aDstFormat, aPackFunc<AlphaByteIndex(aSrcFormat)>)
1028
1029 #define PACK_ALPHA(aDstFormat, aPackFunc)                         \
1030   PACK_ALPHA_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \
1031   PACK_ALPHA_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \
1032   PACK_ALPHA_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc)
1033
1034 template <bool aSwapRB>
1035 void UnpackRowRGB24(const uint8_t* aSrc, uint8_t* aDst, int32_t aLength) {
1036   // Because we are expanding, we can only process the data back to front in
1037   // case we are performing this in place.
1038   const uint8_t* src = aSrc + 3 * (aLength - 1);
1039   uint32_t* dst = reinterpret_cast<uint32_t*>(aDst + 4 * aLength);
1040   while (src >= aSrc) {
1041     uint8_t r = src[aSwapRB ? 2 : 0];
1042     uint8_t g = src[1];
1043     uint8_t b = src[aSwapRB ? 0 : 2];
1044 #if MOZ_LITTLE_ENDIAN()
1045     *--dst = 0xFF000000 | (b << 16) | (g << 8) | r;
1046 #else
1047     *--dst = 0x000000FF | (b << 8) | (g << 16) | (r << 24);
1048 #endif
1049     src -= 3;
1050   }
1051 }
1052
1053 // Force instantiation of swizzle variants here.
1054 template void UnpackRowRGB24<false>(const uint8_t*, uint8_t*, int32_t);
1055 template void UnpackRowRGB24<true>(const uint8_t*, uint8_t*, int32_t);
1056
1057 #define UNPACK_ROW_RGB(aDstFormat)       \
1058   FORMAT_CASE_ROW(                       \
1059       SurfaceFormat::R8G8B8, aDstFormat, \
1060       UnpackRowRGB24<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>)
1061
1062 static void UnpackRowRGB24_To_ARGB(const uint8_t* aSrc, uint8_t* aDst,
1063                                    int32_t aLength) {
1064   // Because we are expanding, we can only process the data back to front in
1065   // case we are performing this in place.
1066   const uint8_t* src = aSrc + 3 * (aLength - 1);
1067   uint32_t* dst = reinterpret_cast<uint32_t*>(aDst + 4 * aLength);
1068   while (src >= aSrc) {
1069     uint8_t r = src[0];
1070     uint8_t g = src[1];
1071     uint8_t b = src[2];
1072 #if MOZ_LITTLE_ENDIAN()
1073     *--dst = 0x000000FF | (r << 8) | (g << 16) | (b << 24);
1074 #else
1075     *--dst = 0xFF000000 | (r << 24) | (g << 16) | b;
1076 #endif
1077     src -= 3;
1078   }
1079 }
1080
1081 #define UNPACK_ROW_RGB_TO_ARGB(aDstFormat) \
1082   FORMAT_CASE_ROW(SurfaceFormat::R8G8B8, aDstFormat, UnpackRowRGB24_To_ARGB)
1083
1084 bool SwizzleData(const uint8_t* aSrc, int32_t aSrcStride,
1085                  SurfaceFormat aSrcFormat, uint8_t* aDst, int32_t aDstStride,
1086                  SurfaceFormat aDstFormat, const IntSize& aSize) {
1087   if (aSize.IsEmpty()) {
1088     return true;
1089   }
1090   IntSize size = CollapseSize(aSize, aSrcStride, aDstStride);
1091   // Find gap from end of row to the start of the next row.
1092   int32_t srcGap = GetStrideGap(aSize.width, aSrcFormat, aSrcStride);
1093   int32_t dstGap = GetStrideGap(aSize.width, aDstFormat, aDstStride);
1094   MOZ_ASSERT(srcGap >= 0 && dstGap >= 0);
1095   if (srcGap < 0 || dstGap < 0) {
1096     return false;
1097   }
1098
1099 #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size)
1100
1101 #ifdef USE_SSE2
1102   if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1103       SWIZZLE_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
1104       SWIZZLE_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8)
1105       SWIZZLE_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
1106       SWIZZLE_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8)
1107       SWIZZLE_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
1108       SWIZZLE_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8)
1109       SWIZZLE_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
1110       SWIZZLE_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8)
1111       default:
1112         break;
1113     }
1114 #endif
1115
1116 #ifdef USE_NEON
1117   if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1118       SWIZZLE_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
1119       SWIZZLE_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8)
1120       SWIZZLE_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
1121       SWIZZLE_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8)
1122       SWIZZLE_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
1123       SWIZZLE_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8)
1124       SWIZZLE_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
1125       SWIZZLE_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8)
1126       default:
1127         break;
1128     }
1129 #endif
1130
1131   switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1132     SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
1133     SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8)
1134     SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
1135     SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8)
1136
1137     SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
1138     SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8)
1139     SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
1140     SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8)
1141     SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::A8R8G8B8)
1142     SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::X8R8G8B8)
1143
1144     SWIZZLE_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8A8)
1145     SWIZZLE_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8X8)
1146     SWIZZLE_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8X8)
1147     SWIZZLE_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8A8)
1148
1149     SWIZZLE_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::A8R8G8B8)
1150     SWIZZLE_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::X8R8G8B8)
1151     SWIZZLE_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::X8R8G8B8)
1152     SWIZZLE_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::A8R8G8B8)
1153     SWIZZLE_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8A8)
1154     SWIZZLE_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8X8)
1155     SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8X8)
1156     SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8A8)
1157
1158     SWIZZLE_SWAP_RGB24(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8)
1159     SWIZZLE_SWAP_RGB24(SurfaceFormat::B8G8R8, SurfaceFormat::R8G8B8)
1160
1161     SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
1162     SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8X8, SurfaceFormat::B8G8R8A8)
1163     SWIZZLE_OPAQUE(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
1164     SWIZZLE_OPAQUE(SurfaceFormat::R8G8B8X8, SurfaceFormat::R8G8B8A8)
1165     SWIZZLE_OPAQUE(SurfaceFormat::A8R8G8B8, SurfaceFormat::X8R8G8B8)
1166     SWIZZLE_OPAQUE(SurfaceFormat::X8R8G8B8, SurfaceFormat::A8R8G8B8)
1167
1168     PACK_RGB(SurfaceFormat::R5G6B5_UINT16, PackToRGB565)
1169     PACK_RGB(SurfaceFormat::B8G8R8, PackToRGB24)
1170     PACK_RGB(SurfaceFormat::R8G8B8, PackToRGB24)
1171     PACK_ALPHA(SurfaceFormat::A8, PackToA8)
1172
1173     default:
1174       break;
1175   }
1176
1177   if (aSrcFormat == aDstFormat) {
1178     // If the formats match, just do a generic copy.
1179     SwizzleCopy(aSrc, srcGap, aDst, dstGap, size, BytesPerPixel(aSrcFormat));
1180     return true;
1181   }
1182
1183 #undef FORMAT_CASE_CALL
1184
1185   MOZ_ASSERT(false, "Unsupported swizzle formats");
1186   return false;
1187 }
1188
1189 SwizzleRowFn SwizzleRow(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat) {
1190 #ifdef USE_SSE2
1191   if (mozilla::supports_avx2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1192       UNPACK_ROW_RGB_AVX2(SurfaceFormat::R8G8B8X8)
1193       UNPACK_ROW_RGB_AVX2(SurfaceFormat::R8G8B8A8)
1194       UNPACK_ROW_RGB_AVX2(SurfaceFormat::B8G8R8X8)
1195       UNPACK_ROW_RGB_AVX2(SurfaceFormat::B8G8R8A8)
1196       default:
1197         break;
1198     }
1199
1200   if (mozilla::supports_ssse3()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1201       UNPACK_ROW_RGB_SSSE3(SurfaceFormat::R8G8B8X8)
1202       UNPACK_ROW_RGB_SSSE3(SurfaceFormat::R8G8B8A8)
1203       UNPACK_ROW_RGB_SSSE3(SurfaceFormat::B8G8R8X8)
1204       UNPACK_ROW_RGB_SSSE3(SurfaceFormat::B8G8R8A8)
1205       default:
1206         break;
1207     }
1208
1209   if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1210       SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
1211       SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8)
1212       SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
1213       SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8)
1214       SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
1215       SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8)
1216       SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
1217       SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8)
1218       default:
1219         break;
1220     }
1221 #endif
1222
1223 #ifdef USE_NEON
1224   if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1225       UNPACK_ROW_RGB_NEON(SurfaceFormat::R8G8B8X8)
1226       UNPACK_ROW_RGB_NEON(SurfaceFormat::R8G8B8A8)
1227       UNPACK_ROW_RGB_NEON(SurfaceFormat::B8G8R8X8)
1228       UNPACK_ROW_RGB_NEON(SurfaceFormat::B8G8R8A8)
1229       SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
1230       SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8)
1231       SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
1232       SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8)
1233       SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
1234       SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8)
1235       SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
1236       SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8)
1237       default:
1238         break;
1239     }
1240 #endif
1241
1242   switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1243     SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
1244     SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8)
1245     SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
1246     SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8)
1247
1248     SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
1249     SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8)
1250     SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
1251     SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8)
1252     SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::A8R8G8B8)
1253     SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::X8R8G8B8)
1254
1255     SWIZZLE_ROW_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8A8)
1256     SWIZZLE_ROW_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8X8)
1257     SWIZZLE_ROW_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8X8)
1258     SWIZZLE_ROW_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8A8)
1259
1260     SWIZZLE_ROW_OPAQUE(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
1261     SWIZZLE_ROW_OPAQUE(SurfaceFormat::B8G8R8X8, SurfaceFormat::B8G8R8A8)
1262     SWIZZLE_ROW_OPAQUE(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
1263     SWIZZLE_ROW_OPAQUE(SurfaceFormat::R8G8B8X8, SurfaceFormat::R8G8B8A8)
1264     SWIZZLE_ROW_OPAQUE(SurfaceFormat::A8R8G8B8, SurfaceFormat::X8R8G8B8)
1265     SWIZZLE_ROW_OPAQUE(SurfaceFormat::X8R8G8B8, SurfaceFormat::A8R8G8B8)
1266
1267     SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::A8R8G8B8)
1268     SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::X8R8G8B8)
1269     SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::X8R8G8B8)
1270     SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::A8R8G8B8)
1271     SWIZZLE_ROW_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8A8)
1272     SWIZZLE_ROW_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8X8)
1273     SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8X8)
1274     SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8A8)
1275
1276     SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8)
1277     SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::B8G8R8, SurfaceFormat::R8G8B8)
1278
1279     UNPACK_ROW_RGB(SurfaceFormat::R8G8B8X8)
1280     UNPACK_ROW_RGB(SurfaceFormat::R8G8B8A8)
1281     UNPACK_ROW_RGB(SurfaceFormat::B8G8R8X8)
1282     UNPACK_ROW_RGB(SurfaceFormat::B8G8R8A8)
1283     UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::A8R8G8B8)
1284     UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::X8R8G8B8)
1285
1286     PACK_ROW_RGB(SurfaceFormat::R8G8B8, PackRowToRGB24)
1287     PACK_ROW_RGB(SurfaceFormat::B8G8R8, PackRowToRGB24)
1288
1289     default:
1290       break;
1291   }
1292
1293   if (aSrcFormat == aDstFormat) {
1294     switch (BytesPerPixel(aSrcFormat)) {
1295       case 4:
1296         return &SwizzleRowCopy<4>;
1297       case 3:
1298         return &SwizzleRowCopy<3>;
1299       default:
1300         break;
1301     }
1302   }
1303
1304   MOZ_ASSERT_UNREACHABLE("Unsupported swizzle formats");
1305   return nullptr;
1306 }
1307
1308 }  // namespace gfx
1309 }  // namespace mozilla