no bug - Bumping Firefox l10n changesets r=release a=l10n-bump DONTBUILD CLOSED TREE
[gecko.git] / gfx / 2d / Swizzle.cpp
blob03647348f3be49072fea45cf0a8768a68962953c
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "Swizzle.h"
8 #include "Logging.h"
9 #include "Orientation.h"
10 #include "Tools.h"
11 #include "mozilla/CheckedInt.h"
12 #include "mozilla/EndianUtils.h"
13 #include "mozilla/UniquePtr.h"
15 #ifdef USE_SSE2
16 # include "mozilla/SSE.h"
17 #endif
19 #ifdef USE_NEON
20 # include "mozilla/arm.h"
21 #endif
23 #include <new>
25 namespace mozilla {
26 namespace gfx {
28 /**
29 * Convenience macros for dispatching to various format combinations.
32 // Hash the formats to a relatively dense value to optimize jump table
33 // generation. The first 6 formats in SurfaceFormat are the 32-bit BGRA variants
34 // and are the most common formats dispatched here. Room is reserved in the
35 // lowish bits for up to these 6 destination formats. If a destination format is
36 // >= 6, the 6th bit is set to avoid collisions.
37 #define FORMAT_KEY(aSrcFormat, aDstFormat) \
38 (int(aSrcFormat) * 6 + int(aDstFormat) + (int(int(aDstFormat) >= 6) << 6))
40 #define FORMAT_CASE_EXPR(aSrcFormat, aDstFormat, ...) \
41 case FORMAT_KEY(aSrcFormat, aDstFormat): \
42 __VA_ARGS__; \
43 return true;
45 #define FORMAT_CASE(aSrcFormat, aDstFormat, ...) \
46 FORMAT_CASE_EXPR(aSrcFormat, aDstFormat, FORMAT_CASE_CALL(__VA_ARGS__))
48 #define FORMAT_CASE_ROW(aSrcFormat, aDstFormat, ...) \
49 case FORMAT_KEY(aSrcFormat, aDstFormat): \
50 return &__VA_ARGS__;
52 /**
53 * Constexpr functions for analyzing format attributes in templates.
56 // Whether B comes before R in pixel memory layout.
57 static constexpr bool IsBGRFormat(SurfaceFormat aFormat) {
58 return aFormat == SurfaceFormat::B8G8R8A8 ||
59 #if MOZ_LITTLE_ENDIAN()
60 aFormat == SurfaceFormat::R5G6B5_UINT16 ||
61 #endif
62 aFormat == SurfaceFormat::B8G8R8X8 || aFormat == SurfaceFormat::B8G8R8;
65 // Whether the order of B and R need to be swapped to map from src to dst.
66 static constexpr bool ShouldSwapRB(SurfaceFormat aSrcFormat,
67 SurfaceFormat aDstFormat) {
68 return IsBGRFormat(aSrcFormat) != IsBGRFormat(aDstFormat);
71 // The starting byte of the RGB components in pixel memory.
72 static constexpr uint32_t RGBByteIndex(SurfaceFormat aFormat) {
73 return aFormat == SurfaceFormat::A8R8G8B8 ||
74 aFormat == SurfaceFormat::X8R8G8B8
75 ? 1
76 : 0;
79 // The byte of the alpha component, which just comes after RGB.
80 static constexpr uint32_t AlphaByteIndex(SurfaceFormat aFormat) {
81 return (RGBByteIndex(aFormat) + 3) % 4;
84 // The endian-dependent bit shift to access RGB of a UINT32 pixel.
85 static constexpr uint32_t RGBBitShift(SurfaceFormat aFormat) {
86 #if MOZ_LITTLE_ENDIAN()
87 return 8 * RGBByteIndex(aFormat);
88 #else
89 return 8 - 8 * RGBByteIndex(aFormat);
90 #endif
93 // The endian-dependent bit shift to access alpha of a UINT32 pixel.
94 static constexpr uint32_t AlphaBitShift(SurfaceFormat aFormat) {
95 return (RGBBitShift(aFormat) + 24) % 32;
98 // Whether the pixel format should ignore the value of the alpha channel and
99 // treat it as opaque.
100 static constexpr bool IgnoreAlpha(SurfaceFormat aFormat) {
101 return aFormat == SurfaceFormat::B8G8R8X8 ||
102 aFormat == SurfaceFormat::R8G8B8X8 ||
103 aFormat == SurfaceFormat::X8R8G8B8;
106 // Whether to force alpha to opaque to map from src to dst.
107 static constexpr bool ShouldForceOpaque(SurfaceFormat aSrcFormat,
108 SurfaceFormat aDstFormat) {
109 return IgnoreAlpha(aSrcFormat) != IgnoreAlpha(aDstFormat);
112 #ifdef USE_SSE2
114 * SSE2 optimizations
117 template <bool aSwapRB, bool aOpaqueAlpha>
118 void Premultiply_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
120 # define PREMULTIPLY_SSE2(aSrcFormat, aDstFormat) \
121 FORMAT_CASE(aSrcFormat, aDstFormat, \
122 Premultiply_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \
123 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
125 template <bool aSwapRB, bool aOpaqueAlpha>
126 void PremultiplyRow_SSE2(const uint8_t*, uint8_t*, int32_t);
128 # define PREMULTIPLY_ROW_SSE2(aSrcFormat, aDstFormat) \
129 FORMAT_CASE_ROW( \
130 aSrcFormat, aDstFormat, \
131 PremultiplyRow_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \
132 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
134 template <bool aSwapRB>
135 void Unpremultiply_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
137 # define UNPREMULTIPLY_SSE2(aSrcFormat, aDstFormat) \
138 FORMAT_CASE(aSrcFormat, aDstFormat, \
139 Unpremultiply_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat)>)
141 template <bool aSwapRB>
142 void UnpremultiplyRow_SSE2(const uint8_t*, uint8_t*, int32_t);
144 # define UNPREMULTIPLY_ROW_SSE2(aSrcFormat, aDstFormat) \
145 FORMAT_CASE_ROW( \
146 aSrcFormat, aDstFormat, \
147 UnpremultiplyRow_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat)>)
149 template <bool aSwapRB, bool aOpaqueAlpha>
150 void Swizzle_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
152 # define SWIZZLE_SSE2(aSrcFormat, aDstFormat) \
153 FORMAT_CASE(aSrcFormat, aDstFormat, \
154 Swizzle_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \
155 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
157 template <bool aSwapRB, bool aOpaqueAlpha>
158 void SwizzleRow_SSE2(const uint8_t*, uint8_t*, int32_t);
160 # define SWIZZLE_ROW_SSE2(aSrcFormat, aDstFormat) \
161 FORMAT_CASE_ROW( \
162 aSrcFormat, aDstFormat, \
163 SwizzleRow_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \
164 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
166 template <bool aSwapRB>
167 void UnpackRowRGB24_SSSE3(const uint8_t*, uint8_t*, int32_t);
169 # define UNPACK_ROW_RGB_SSSE3(aDstFormat) \
170 FORMAT_CASE_ROW( \
171 SurfaceFormat::R8G8B8, aDstFormat, \
172 UnpackRowRGB24_SSSE3<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>)
174 template <bool aSwapRB>
175 void UnpackRowRGB24_AVX2(const uint8_t*, uint8_t*, int32_t);
177 # define UNPACK_ROW_RGB_AVX2(aDstFormat) \
178 FORMAT_CASE_ROW( \
179 SurfaceFormat::R8G8B8, aDstFormat, \
180 UnpackRowRGB24_AVX2<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>)
182 #endif
184 #ifdef USE_NEON
186 * ARM NEON optimizations
189 template <bool aSwapRB, bool aOpaqueAlpha>
190 void Premultiply_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
192 # define PREMULTIPLY_NEON(aSrcFormat, aDstFormat) \
193 FORMAT_CASE(aSrcFormat, aDstFormat, \
194 Premultiply_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \
195 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
197 template <bool aSwapRB, bool aOpaqueAlpha>
198 void PremultiplyRow_NEON(const uint8_t*, uint8_t*, int32_t);
200 # define PREMULTIPLY_ROW_NEON(aSrcFormat, aDstFormat) \
201 FORMAT_CASE_ROW( \
202 aSrcFormat, aDstFormat, \
203 PremultiplyRow_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \
204 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
206 template <bool aSwapRB>
207 void Unpremultiply_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
209 # define UNPREMULTIPLY_NEON(aSrcFormat, aDstFormat) \
210 FORMAT_CASE(aSrcFormat, aDstFormat, \
211 Unpremultiply_NEON<ShouldSwapRB(aSrcFormat, aDstFormat)>)
213 template <bool aSwapRB>
214 void UnpremultiplyRow_NEON(const uint8_t*, uint8_t*, int32_t);
216 # define UNPREMULTIPLY_ROW_NEON(aSrcFormat, aDstFormat) \
217 FORMAT_CASE_ROW( \
218 aSrcFormat, aDstFormat, \
219 UnpremultiplyRow_NEON<ShouldSwapRB(aSrcFormat, aDstFormat)>)
221 template <bool aSwapRB, bool aOpaqueAlpha>
222 void Swizzle_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
224 # define SWIZZLE_NEON(aSrcFormat, aDstFormat) \
225 FORMAT_CASE(aSrcFormat, aDstFormat, \
226 Swizzle_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \
227 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
229 template <bool aSwapRB, bool aOpaqueAlpha>
230 void SwizzleRow_NEON(const uint8_t*, uint8_t*, int32_t);
232 # define SWIZZLE_ROW_NEON(aSrcFormat, aDstFormat) \
233 FORMAT_CASE_ROW( \
234 aSrcFormat, aDstFormat, \
235 SwizzleRow_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \
236 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
238 template <bool aSwapRB>
239 void UnpackRowRGB24_NEON(const uint8_t*, uint8_t*, int32_t);
241 # define UNPACK_ROW_RGB_NEON(aDstFormat) \
242 FORMAT_CASE_ROW( \
243 SurfaceFormat::R8G8B8, aDstFormat, \
244 UnpackRowRGB24_NEON<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>)
245 #endif
248 * Premultiplying
251 // Fallback premultiply implementation that uses splayed pixel math to reduce
252 // the multiplications used. That is, the R and B components are isolated from
253 // the G and A components, which then can be multiplied as if they were two
254 // 2-component vectors. Otherwise, an approximation if divide-by-255 is used
255 // which is faster than an actual division. These optimizations are also used
256 // for the SSE2 and NEON implementations.
257 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift,
258 uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift>
259 static void PremultiplyChunkFallback(const uint8_t*& aSrc, uint8_t*& aDst,
260 int32_t aLength) {
261 const uint8_t* end = aSrc + 4 * aLength;
262 do {
263 // Load and process 1 entire pixel at a time.
264 uint32_t color = *reinterpret_cast<const uint32_t*>(aSrc);
266 uint32_t a = aSrcAShift ? color >> aSrcAShift : color & 0xFF;
268 // Isolate the R and B components.
269 uint32_t rb = (color >> aSrcRGBShift) & 0x00FF00FF;
270 // Swap the order of R and B if necessary.
271 if (aSwapRB) {
272 rb = (rb >> 16) | (rb << 16);
274 // Approximate the multiply by alpha and divide by 255 which is
275 // essentially:
276 // c = c*a + 255; c = (c + (c >> 8)) >> 8;
277 // However, we omit the final >> 8 to fold it with the final shift into
278 // place depending on desired output format.
279 rb = rb * a + 0x00FF00FF;
280 rb = (rb + ((rb >> 8) & 0x00FF00FF)) & 0xFF00FF00;
282 // Use same approximation as above, but G is shifted 8 bits left.
283 // Alpha is left out and handled separately.
284 uint32_t g = color & (0xFF00 << aSrcRGBShift);
285 g = g * a + (0xFF00 << aSrcRGBShift);
286 g = (g + (g >> 8)) & (0xFF0000 << aSrcRGBShift);
288 // The above math leaves RGB shifted left by 8 bits.
289 // Shift them right if required for the output format.
290 // then combine them back together to produce output pixel.
291 // Add the alpha back on if the output format is not opaque.
292 *reinterpret_cast<uint32_t*>(aDst) =
293 (rb >> (8 - aDstRGBShift)) | (g >> (8 + aSrcRGBShift - aDstRGBShift)) |
294 (aOpaqueAlpha ? 0xFF << aDstAShift : a << aDstAShift);
296 aSrc += 4;
297 aDst += 4;
298 } while (aSrc < end);
301 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift,
302 uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift>
303 static void PremultiplyRowFallback(const uint8_t* aSrc, uint8_t* aDst,
304 int32_t aLength) {
305 PremultiplyChunkFallback<aSwapRB, aOpaqueAlpha, aSrcRGBShift, aSrcAShift,
306 aDstRGBShift, aDstAShift>(aSrc, aDst, aLength);
309 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift,
310 uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift>
311 static void PremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
312 uint8_t* aDst, int32_t aDstGap, IntSize aSize) {
313 for (int32_t height = aSize.height; height > 0; height--) {
314 PremultiplyChunkFallback<aSwapRB, aOpaqueAlpha, aSrcRGBShift, aSrcAShift,
315 aDstRGBShift, aDstAShift>(aSrc, aDst, aSize.width);
316 aSrc += aSrcGap;
317 aDst += aDstGap;
321 #define PREMULTIPLY_FALLBACK_CASE(aSrcFormat, aDstFormat) \
322 FORMAT_CASE( \
323 aSrcFormat, aDstFormat, \
324 PremultiplyFallback<ShouldSwapRB(aSrcFormat, aDstFormat), \
325 ShouldForceOpaque(aSrcFormat, aDstFormat), \
326 RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
327 RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)
329 #define PREMULTIPLY_FALLBACK(aSrcFormat) \
330 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
331 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8X8) \
332 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
333 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8X8) \
334 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8) \
335 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::X8R8G8B8)
337 #define PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, aDstFormat) \
338 FORMAT_CASE_ROW(aSrcFormat, aDstFormat, \
339 PremultiplyRowFallback< \
340 ShouldSwapRB(aSrcFormat, aDstFormat), \
341 ShouldForceOpaque(aSrcFormat, aDstFormat), \
342 RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
343 RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)
345 #define PREMULTIPLY_ROW_FALLBACK(aSrcFormat) \
346 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
347 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8X8) \
348 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
349 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8X8) \
350 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8) \
351 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::X8R8G8B8)
353 // If rows are tightly packed, and the size of the total area will fit within
354 // the precision range of a single row, then process all the data as if it was
355 // a single row.
356 static inline IntSize CollapseSize(const IntSize& aSize, int32_t aSrcStride,
357 int32_t aDstStride) {
358 if (aSrcStride == aDstStride && (aSrcStride & 3) == 0 &&
359 aSrcStride / 4 == aSize.width) {
360 CheckedInt32 area = CheckedInt32(aSize.width) * CheckedInt32(aSize.height);
361 if (area.isValid()) {
362 return IntSize(area.value(), 1);
365 return aSize;
368 static inline int32_t GetStrideGap(int32_t aWidth, SurfaceFormat aFormat,
369 int32_t aStride) {
370 CheckedInt32 used = CheckedInt32(aWidth) * BytesPerPixel(aFormat);
371 if (!used.isValid() || used.value() < 0) {
372 return -1;
374 return aStride - used.value();
377 bool PremultiplyData(const uint8_t* aSrc, int32_t aSrcStride,
378 SurfaceFormat aSrcFormat, uint8_t* aDst,
379 int32_t aDstStride, SurfaceFormat aDstFormat,
380 const IntSize& aSize) {
381 if (aSize.IsEmpty()) {
382 return true;
384 IntSize size = CollapseSize(aSize, aSrcStride, aDstStride);
385 // Find gap from end of row to the start of the next row.
386 int32_t srcGap = GetStrideGap(aSize.width, aSrcFormat, aSrcStride);
387 int32_t dstGap = GetStrideGap(aSize.width, aDstFormat, aDstStride);
388 MOZ_ASSERT(srcGap >= 0 && dstGap >= 0);
389 if (srcGap < 0 || dstGap < 0) {
390 return false;
393 #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size)
395 #ifdef USE_SSE2
396 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
397 PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
398 PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
399 PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
400 PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
401 PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
402 PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
403 PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
404 PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
405 default:
406 break;
408 #endif
410 #ifdef USE_NEON
411 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
412 PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
413 PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
414 PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
415 PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
416 PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
417 PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
418 PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
419 PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
420 default:
421 break;
423 #endif
425 switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
426 PREMULTIPLY_FALLBACK(SurfaceFormat::B8G8R8A8)
427 PREMULTIPLY_FALLBACK(SurfaceFormat::R8G8B8A8)
428 PREMULTIPLY_FALLBACK(SurfaceFormat::A8R8G8B8)
429 default:
430 break;
433 #undef FORMAT_CASE_CALL
435 MOZ_ASSERT(false, "Unsupported premultiply formats");
436 return false;
439 SwizzleRowFn PremultiplyRow(SurfaceFormat aSrcFormat,
440 SurfaceFormat aDstFormat) {
441 #ifdef USE_SSE2
442 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
443 PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
444 PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
445 PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
446 PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
447 PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
448 PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
449 PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
450 PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
451 default:
452 break;
454 #endif
456 #ifdef USE_NEON
457 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
458 PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
459 PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
460 PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
461 PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
462 PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
463 PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
464 PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
465 PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
466 default:
467 break;
469 #endif
471 switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
472 PREMULTIPLY_ROW_FALLBACK(SurfaceFormat::B8G8R8A8)
473 PREMULTIPLY_ROW_FALLBACK(SurfaceFormat::R8G8B8A8)
474 PREMULTIPLY_ROW_FALLBACK(SurfaceFormat::A8R8G8B8)
475 default:
476 break;
479 MOZ_ASSERT_UNREACHABLE("Unsupported premultiply formats");
480 return nullptr;
484 * Unpremultiplying
487 // Generate a table of 8.16 fixed-point reciprocals representing 1/alpha.
488 #define UNPREMULQ(x) (0xFF00FFU / (x))
489 #define UNPREMULQ_2(x) UNPREMULQ(x), UNPREMULQ((x) + 1)
490 #define UNPREMULQ_4(x) UNPREMULQ_2(x), UNPREMULQ_2((x) + 2)
491 #define UNPREMULQ_8(x) UNPREMULQ_4(x), UNPREMULQ_4((x) + 4)
492 #define UNPREMULQ_16(x) UNPREMULQ_8(x), UNPREMULQ_8((x) + 8)
493 #define UNPREMULQ_32(x) UNPREMULQ_16(x), UNPREMULQ_16((x) + 16)
494 static const uint32_t sUnpremultiplyTable[256] = {0,
495 UNPREMULQ(1),
496 UNPREMULQ_2(2),
497 UNPREMULQ_4(4),
498 UNPREMULQ_8(8),
499 UNPREMULQ_16(16),
500 UNPREMULQ_32(32),
501 UNPREMULQ_32(64),
502 UNPREMULQ_32(96),
503 UNPREMULQ_32(128),
504 UNPREMULQ_32(160),
505 UNPREMULQ_32(192),
506 UNPREMULQ_32(224)};
508 // Fallback unpremultiply implementation that uses 8.16 fixed-point reciprocal
509 // math to eliminate any division by the alpha component. This optimization is
510 // used for the SSE2 and NEON implementations, with some adaptations. This
511 // implementation also accesses color components using individual byte accesses
512 // as this profiles faster than accessing the pixel as a uint32_t and
513 // shifting/masking to access components.
514 template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
515 uint32_t aDstRGBIndex, uint32_t aDstAIndex>
516 static void UnpremultiplyChunkFallback(const uint8_t*& aSrc, uint8_t*& aDst,
517 int32_t aLength) {
518 const uint8_t* end = aSrc + 4 * aLength;
519 do {
520 uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)];
521 uint8_t g = aSrc[aSrcRGBIndex + 1];
522 uint8_t b = aSrc[aSrcRGBIndex + (aSwapRB ? 0 : 2)];
523 uint8_t a = aSrc[aSrcAIndex];
525 // Access the 8.16 reciprocal from the table based on alpha. Multiply by
526 // the reciprocal and shift off the fraction bits to approximate the
527 // division by alpha.
528 uint32_t q = sUnpremultiplyTable[a];
529 aDst[aDstRGBIndex + 0] = (r * q) >> 16;
530 aDst[aDstRGBIndex + 1] = (g * q) >> 16;
531 aDst[aDstRGBIndex + 2] = (b * q) >> 16;
532 aDst[aDstAIndex] = a;
534 aSrc += 4;
535 aDst += 4;
536 } while (aSrc < end);
539 template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
540 uint32_t aDstRGBIndex, uint32_t aDstAIndex>
541 static void UnpremultiplyRowFallback(const uint8_t* aSrc, uint8_t* aDst,
542 int32_t aLength) {
543 UnpremultiplyChunkFallback<aSwapRB, aSrcRGBIndex, aSrcAIndex, aDstRGBIndex,
544 aDstAIndex>(aSrc, aDst, aLength);
547 template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
548 uint32_t aDstRGBIndex, uint32_t aDstAIndex>
549 static void UnpremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
550 uint8_t* aDst, int32_t aDstGap,
551 IntSize aSize) {
552 for (int32_t height = aSize.height; height > 0; height--) {
553 UnpremultiplyChunkFallback<aSwapRB, aSrcRGBIndex, aSrcAIndex, aDstRGBIndex,
554 aDstAIndex>(aSrc, aDst, aSize.width);
555 aSrc += aSrcGap;
556 aDst += aDstGap;
560 #define UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, aDstFormat) \
561 FORMAT_CASE(aSrcFormat, aDstFormat, \
562 UnpremultiplyFallback< \
563 ShouldSwapRB(aSrcFormat, aDstFormat), \
564 RGBByteIndex(aSrcFormat), AlphaByteIndex(aSrcFormat), \
565 RGBByteIndex(aDstFormat), AlphaByteIndex(aDstFormat)>)
567 #define UNPREMULTIPLY_FALLBACK(aSrcFormat) \
568 UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
569 UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
570 UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8)
572 #define UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, aDstFormat) \
573 FORMAT_CASE_ROW(aSrcFormat, aDstFormat, \
574 UnpremultiplyRowFallback< \
575 ShouldSwapRB(aSrcFormat, aDstFormat), \
576 RGBByteIndex(aSrcFormat), AlphaByteIndex(aSrcFormat), \
577 RGBByteIndex(aDstFormat), AlphaByteIndex(aDstFormat)>)
579 #define UNPREMULTIPLY_ROW_FALLBACK(aSrcFormat) \
580 UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
581 UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
582 UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8)
584 bool UnpremultiplyData(const uint8_t* aSrc, int32_t aSrcStride,
585 SurfaceFormat aSrcFormat, uint8_t* aDst,
586 int32_t aDstStride, SurfaceFormat aDstFormat,
587 const IntSize& aSize) {
588 if (aSize.IsEmpty()) {
589 return true;
591 IntSize size = CollapseSize(aSize, aSrcStride, aDstStride);
592 // Find gap from end of row to the start of the next row.
593 int32_t srcGap = GetStrideGap(aSize.width, aSrcFormat, aSrcStride);
594 int32_t dstGap = GetStrideGap(aSize.width, aDstFormat, aDstStride);
595 MOZ_ASSERT(srcGap >= 0 && dstGap >= 0);
596 if (srcGap < 0 || dstGap < 0) {
597 return false;
600 #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size)
602 #ifdef USE_SSE2
603 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
604 UNPREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
605 UNPREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
606 UNPREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
607 UNPREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
608 default:
609 break;
611 #endif
613 #ifdef USE_NEON
614 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
615 UNPREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
616 UNPREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
617 UNPREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
618 UNPREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
619 default:
620 break;
622 #endif
624 switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
625 UNPREMULTIPLY_FALLBACK(SurfaceFormat::B8G8R8A8)
626 UNPREMULTIPLY_FALLBACK(SurfaceFormat::R8G8B8A8)
627 UNPREMULTIPLY_FALLBACK(SurfaceFormat::A8R8G8B8)
628 default:
629 break;
632 #undef FORMAT_CASE_CALL
634 MOZ_ASSERT(false, "Unsupported unpremultiply formats");
635 return false;
638 SwizzleRowFn UnpremultiplyRow(SurfaceFormat aSrcFormat,
639 SurfaceFormat aDstFormat) {
640 #ifdef USE_SSE2
641 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
642 UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
643 UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
644 UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
645 UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
646 default:
647 break;
649 #endif
651 #ifdef USE_NEON
652 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
653 UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
654 UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
655 UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
656 UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
657 default:
658 break;
660 #endif
662 switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
663 UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::B8G8R8A8)
664 UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::R8G8B8A8)
665 UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::A8R8G8B8)
666 default:
667 break;
670 MOZ_ASSERT_UNREACHABLE("Unsupported premultiply formats");
671 return nullptr;
675 * Swizzling
678 // Fallback swizzle implementation that uses shifting and masking to reorder
679 // pixels.
680 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift,
681 uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift>
682 static void SwizzleChunkFallback(const uint8_t*& aSrc, uint8_t*& aDst,
683 int32_t aLength) {
684 const uint8_t* end = aSrc + 4 * aLength;
685 do {
686 uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc);
688 if (aSwapRB) {
689 // Handle R and B swaps by exchanging words and masking.
690 uint32_t rb =
691 ((rgba << 16) | (rgba >> 16)) & (0x00FF00FF << aSrcRGBShift);
692 uint32_t ga = rgba & ((0xFF << aSrcAShift) | (0xFF00 << aSrcRGBShift));
693 rgba = rb | ga;
696 // If src and dst shifts differ, rotate left or right to move RGB into
697 // place, i.e. ARGB -> RGBA or ARGB -> RGBA.
698 if (aDstRGBShift > aSrcRGBShift) {
699 rgba = (rgba << 8) | (aOpaqueAlpha ? 0x000000FF : rgba >> 24);
700 } else if (aSrcRGBShift > aDstRGBShift) {
701 rgba = (rgba >> 8) | (aOpaqueAlpha ? 0xFF000000 : rgba << 24);
702 } else if (aOpaqueAlpha) {
703 rgba |= 0xFF << aDstAShift;
706 *reinterpret_cast<uint32_t*>(aDst) = rgba;
708 aSrc += 4;
709 aDst += 4;
710 } while (aSrc < end);
713 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift,
714 uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift>
715 static void SwizzleRowFallback(const uint8_t* aSrc, uint8_t* aDst,
716 int32_t aLength) {
717 SwizzleChunkFallback<aSwapRB, aOpaqueAlpha, aSrcRGBShift, aSrcAShift,
718 aDstRGBShift, aDstAShift>(aSrc, aDst, aLength);
721 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift,
722 uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift>
723 static void SwizzleFallback(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
724 int32_t aDstGap, IntSize aSize) {
725 for (int32_t height = aSize.height; height > 0; height--) {
726 SwizzleChunkFallback<aSwapRB, aOpaqueAlpha, aSrcRGBShift, aSrcAShift,
727 aDstRGBShift, aDstAShift>(aSrc, aDst, aSize.width);
728 aSrc += aSrcGap;
729 aDst += aDstGap;
733 #define SWIZZLE_FALLBACK(aSrcFormat, aDstFormat) \
734 FORMAT_CASE( \
735 aSrcFormat, aDstFormat, \
736 SwizzleFallback<ShouldSwapRB(aSrcFormat, aDstFormat), \
737 ShouldForceOpaque(aSrcFormat, aDstFormat), \
738 RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
739 RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)
741 #define SWIZZLE_ROW_FALLBACK(aSrcFormat, aDstFormat) \
742 FORMAT_CASE_ROW( \
743 aSrcFormat, aDstFormat, \
744 SwizzleRowFallback<ShouldSwapRB(aSrcFormat, aDstFormat), \
745 ShouldForceOpaque(aSrcFormat, aDstFormat), \
746 RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
747 RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)
749 // Fast-path for matching formats.
750 template <int32_t aBytesPerPixel>
751 static void SwizzleRowCopy(const uint8_t* aSrc, uint8_t* aDst,
752 int32_t aLength) {
753 if (aSrc != aDst) {
754 memcpy(aDst, aSrc, aLength * aBytesPerPixel);
758 // Fast-path for matching formats.
759 static void SwizzleCopy(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
760 int32_t aDstGap, IntSize aSize, int32_t aBPP) {
761 if (aSrc != aDst) {
762 int32_t rowLength = aBPP * aSize.width;
763 for (int32_t height = aSize.height; height > 0; height--) {
764 memcpy(aDst, aSrc, rowLength);
765 aSrc += rowLength + aSrcGap;
766 aDst += rowLength + aDstGap;
771 // Fast-path for conversions that swap all bytes.
772 template <bool aOpaqueAlpha, uint32_t aSrcAShift, uint32_t aDstAShift>
773 static void SwizzleChunkSwap(const uint8_t*& aSrc, uint8_t*& aDst,
774 int32_t aLength) {
775 const uint8_t* end = aSrc + 4 * aLength;
776 do {
777 // Use an endian swap to move the bytes, i.e. BGRA -> ARGB.
778 uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc);
779 #if MOZ_LITTLE_ENDIAN()
780 rgba = NativeEndian::swapToBigEndian(rgba);
781 #else
782 rgba = NativeEndian::swapToLittleEndian(rgba);
783 #endif
784 if (aOpaqueAlpha) {
785 rgba |= 0xFF << aDstAShift;
787 *reinterpret_cast<uint32_t*>(aDst) = rgba;
788 aSrc += 4;
789 aDst += 4;
790 } while (aSrc < end);
793 template <bool aOpaqueAlpha, uint32_t aSrcAShift, uint32_t aDstAShift>
794 static void SwizzleRowSwap(const uint8_t* aSrc, uint8_t* aDst,
795 int32_t aLength) {
796 SwizzleChunkSwap<aOpaqueAlpha, aSrcAShift, aDstAShift>(aSrc, aDst, aLength);
799 template <bool aOpaqueAlpha, uint32_t aSrcAShift, uint32_t aDstAShift>
800 static void SwizzleSwap(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
801 int32_t aDstGap, IntSize aSize) {
802 for (int32_t height = aSize.height; height > 0; height--) {
803 SwizzleChunkSwap<aOpaqueAlpha, aSrcAShift, aDstAShift>(aSrc, aDst,
804 aSize.width);
805 aSrc += aSrcGap;
806 aDst += aDstGap;
810 #define SWIZZLE_SWAP(aSrcFormat, aDstFormat) \
811 FORMAT_CASE( \
812 aSrcFormat, aDstFormat, \
813 SwizzleSwap<ShouldForceOpaque(aSrcFormat, aDstFormat), \
814 AlphaBitShift(aSrcFormat), AlphaBitShift(aDstFormat)>)
816 #define SWIZZLE_ROW_SWAP(aSrcFormat, aDstFormat) \
817 FORMAT_CASE_ROW( \
818 aSrcFormat, aDstFormat, \
819 SwizzleRowSwap<ShouldForceOpaque(aSrcFormat, aDstFormat), \
820 AlphaBitShift(aSrcFormat), AlphaBitShift(aDstFormat)>)
822 static void SwizzleChunkSwapRGB24(const uint8_t*& aSrc, uint8_t*& aDst,
823 int32_t aLength) {
824 const uint8_t* end = aSrc + 3 * aLength;
825 do {
826 uint8_t r = aSrc[0];
827 uint8_t g = aSrc[1];
828 uint8_t b = aSrc[2];
829 aDst[0] = b;
830 aDst[1] = g;
831 aDst[2] = r;
832 aSrc += 3;
833 aDst += 3;
834 } while (aSrc < end);
837 static void SwizzleRowSwapRGB24(const uint8_t* aSrc, uint8_t* aDst,
838 int32_t aLength) {
839 SwizzleChunkSwapRGB24(aSrc, aDst, aLength);
842 static void SwizzleSwapRGB24(const uint8_t* aSrc, int32_t aSrcGap,
843 uint8_t* aDst, int32_t aDstGap, IntSize aSize) {
844 for (int32_t height = aSize.height; height > 0; height--) {
845 SwizzleChunkSwapRGB24(aSrc, aDst, aSize.width);
846 aSrc += aSrcGap;
847 aDst += aDstGap;
851 #define SWIZZLE_SWAP_RGB24(aSrcFormat, aDstFormat) \
852 FORMAT_CASE(aSrcFormat, aDstFormat, SwizzleSwapRGB24)
854 #define SWIZZLE_ROW_SWAP_RGB24(aSrcFormat, aDstFormat) \
855 FORMAT_CASE_ROW(aSrcFormat, aDstFormat, SwizzleRowSwapRGB24)
857 // Fast-path for conversions that force alpha to opaque.
858 template <uint32_t aDstAShift>
859 static void SwizzleChunkOpaqueUpdate(uint8_t*& aBuffer, int32_t aLength) {
860 const uint8_t* end = aBuffer + 4 * aLength;
861 do {
862 uint32_t rgba = *reinterpret_cast<const uint32_t*>(aBuffer);
863 // Just add on the alpha bits to the source.
864 rgba |= 0xFF << aDstAShift;
865 *reinterpret_cast<uint32_t*>(aBuffer) = rgba;
866 aBuffer += 4;
867 } while (aBuffer < end);
870 template <uint32_t aDstAShift>
871 static void SwizzleChunkOpaqueCopy(const uint8_t*& aSrc, uint8_t* aDst,
872 int32_t aLength) {
873 const uint8_t* end = aSrc + 4 * aLength;
874 do {
875 uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc);
876 // Just add on the alpha bits to the source.
877 rgba |= 0xFF << aDstAShift;
878 *reinterpret_cast<uint32_t*>(aDst) = rgba;
879 aSrc += 4;
880 aDst += 4;
881 } while (aSrc < end);
884 template <uint32_t aDstAShift>
885 static void SwizzleRowOpaque(const uint8_t* aSrc, uint8_t* aDst,
886 int32_t aLength) {
887 if (aSrc == aDst) {
888 SwizzleChunkOpaqueUpdate<aDstAShift>(aDst, aLength);
889 } else {
890 SwizzleChunkOpaqueCopy<aDstAShift>(aSrc, aDst, aLength);
894 template <uint32_t aDstAShift>
895 static void SwizzleOpaque(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
896 int32_t aDstGap, IntSize aSize) {
897 if (aSrc == aDst) {
898 // Modifying in-place, so just write out the alpha.
899 for (int32_t height = aSize.height; height > 0; height--) {
900 SwizzleChunkOpaqueUpdate<aDstAShift>(aDst, aSize.width);
901 aDst += aDstGap;
903 } else {
904 for (int32_t height = aSize.height; height > 0; height--) {
905 SwizzleChunkOpaqueCopy<aDstAShift>(aSrc, aDst, aSize.width);
906 aSrc += aSrcGap;
907 aDst += aDstGap;
912 #define SWIZZLE_OPAQUE(aSrcFormat, aDstFormat) \
913 FORMAT_CASE(aSrcFormat, aDstFormat, SwizzleOpaque<AlphaBitShift(aDstFormat)>)
915 #define SWIZZLE_ROW_OPAQUE(aSrcFormat, aDstFormat) \
916 FORMAT_CASE_ROW(aSrcFormat, aDstFormat, \
917 SwizzleRowOpaque<AlphaBitShift(aDstFormat)>)
919 // Packing of 32-bit formats to RGB565.
920 template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
921 static void PackToRGB565(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
922 int32_t aDstGap, IntSize aSize) {
923 for (int32_t height = aSize.height; height > 0; height--) {
924 const uint8_t* end = aSrc + 4 * aSize.width;
925 do {
926 uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc);
928 // Isolate the R, G, and B components and shift to final endian-dependent
929 // locations.
930 uint16_t rgb565;
931 if (aSwapRB) {
932 rgb565 = ((rgba & (0xF8 << aSrcRGBShift)) << (8 - aSrcRGBShift)) |
933 ((rgba & (0xFC00 << aSrcRGBShift)) >> (5 + aSrcRGBShift)) |
934 ((rgba & (0xF80000 << aSrcRGBShift)) >> (19 + aSrcRGBShift));
935 } else {
936 rgb565 = ((rgba & (0xF8 << aSrcRGBShift)) >> (3 + aSrcRGBShift)) |
937 ((rgba & (0xFC00 << aSrcRGBShift)) >> (5 + aSrcRGBShift)) |
938 ((rgba & (0xF80000 << aSrcRGBShift)) >> (8 + aSrcRGBShift));
941 *reinterpret_cast<uint16_t*>(aDst) = rgb565;
943 aSrc += 4;
944 aDst += 2;
945 } while (aSrc < end);
947 aSrc += aSrcGap;
948 aDst += aDstGap;
952 // Packing of 32-bit formats to 24-bit formats.
953 template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
954 static void PackChunkToRGB24(const uint8_t*& aSrc, uint8_t*& aDst,
955 int32_t aLength) {
956 const uint8_t* end = aSrc + 4 * aLength;
957 do {
958 uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)];
959 uint8_t g = aSrc[aSrcRGBIndex + 1];
960 uint8_t b = aSrc[aSrcRGBIndex + (aSwapRB ? 0 : 2)];
962 aDst[0] = r;
963 aDst[1] = g;
964 aDst[2] = b;
966 aSrc += 4;
967 aDst += 3;
968 } while (aSrc < end);
971 template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
972 static void PackRowToRGB24(const uint8_t* aSrc, uint8_t* aDst,
973 int32_t aLength) {
974 PackChunkToRGB24<aSwapRB, aSrcRGBShift, aSrcRGBIndex>(aSrc, aDst, aLength);
977 template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
978 static void PackToRGB24(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
979 int32_t aDstGap, IntSize aSize) {
980 for (int32_t height = aSize.height; height > 0; height--) {
981 PackChunkToRGB24<aSwapRB, aSrcRGBShift, aSrcRGBIndex>(aSrc, aDst,
982 aSize.width);
983 aSrc += aSrcGap;
984 aDst += aDstGap;
988 #define PACK_RGB_CASE(aSrcFormat, aDstFormat, aPackFunc) \
989 FORMAT_CASE(aSrcFormat, aDstFormat, \
990 aPackFunc<ShouldSwapRB(aSrcFormat, aDstFormat), \
991 RGBBitShift(aSrcFormat), RGBByteIndex(aSrcFormat)>)
993 #define PACK_RGB(aDstFormat, aPackFunc) \
994 PACK_RGB_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \
995 PACK_RGB_CASE(SurfaceFormat::B8G8R8X8, aDstFormat, aPackFunc) \
996 PACK_RGB_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \
997 PACK_RGB_CASE(SurfaceFormat::R8G8B8X8, aDstFormat, aPackFunc) \
998 PACK_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \
999 PACK_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc)
1001 #define PACK_ROW_RGB_CASE(aSrcFormat, aDstFormat, aPackFunc) \
1002 FORMAT_CASE_ROW( \
1003 aSrcFormat, aDstFormat, \
1004 aPackFunc<ShouldSwapRB(aSrcFormat, aDstFormat), RGBBitShift(aSrcFormat), \
1005 RGBByteIndex(aSrcFormat)>)
1007 #define PACK_ROW_RGB(aDstFormat, aPackFunc) \
1008 PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \
1009 PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8X8, aDstFormat, aPackFunc) \
1010 PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \
1011 PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8X8, aDstFormat, aPackFunc) \
1012 PACK_ROW_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \
1013 PACK_ROW_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc)
1015 // Packing of 32-bit formats to A8.
1016 template <uint32_t aSrcAIndex>
1017 static void PackToA8(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst,
1018 int32_t aDstGap, IntSize aSize) {
1019 for (int32_t height = aSize.height; height > 0; height--) {
1020 const uint8_t* end = aSrc + 4 * aSize.width;
1021 do {
1022 *aDst++ = aSrc[aSrcAIndex];
1023 aSrc += 4;
1024 } while (aSrc < end);
1025 aSrc += aSrcGap;
1026 aDst += aDstGap;
1030 #define PACK_ALPHA_CASE(aSrcFormat, aDstFormat, aPackFunc) \
1031 FORMAT_CASE(aSrcFormat, aDstFormat, aPackFunc<AlphaByteIndex(aSrcFormat)>)
1033 #define PACK_ALPHA(aDstFormat, aPackFunc) \
1034 PACK_ALPHA_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \
1035 PACK_ALPHA_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \
1036 PACK_ALPHA_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc)
1038 template <bool aSwapRB>
1039 void UnpackRowRGB24(const uint8_t* aSrc, uint8_t* aDst, int32_t aLength) {
1040 // Because we are expanding, we can only process the data back to front in
1041 // case we are performing this in place.
1042 const uint8_t* src = aSrc + 3 * (aLength - 1);
1043 uint32_t* dst = reinterpret_cast<uint32_t*>(aDst + 4 * aLength);
1044 while (src >= aSrc) {
1045 uint8_t r = src[aSwapRB ? 2 : 0];
1046 uint8_t g = src[1];
1047 uint8_t b = src[aSwapRB ? 0 : 2];
1048 #if MOZ_LITTLE_ENDIAN()
1049 *--dst = 0xFF000000 | (b << 16) | (g << 8) | r;
1050 #else
1051 *--dst = 0x000000FF | (b << 8) | (g << 16) | (r << 24);
1052 #endif
1053 src -= 3;
1057 // Force instantiation of swizzle variants here.
1058 template void UnpackRowRGB24<false>(const uint8_t*, uint8_t*, int32_t);
1059 template void UnpackRowRGB24<true>(const uint8_t*, uint8_t*, int32_t);
1061 #define UNPACK_ROW_RGB(aDstFormat) \
1062 FORMAT_CASE_ROW( \
1063 SurfaceFormat::R8G8B8, aDstFormat, \
1064 UnpackRowRGB24<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>)
1066 static void UnpackRowRGB24_To_ARGB(const uint8_t* aSrc, uint8_t* aDst,
1067 int32_t aLength) {
1068 // Because we are expanding, we can only process the data back to front in
1069 // case we are performing this in place.
1070 const uint8_t* src = aSrc + 3 * (aLength - 1);
1071 uint32_t* dst = reinterpret_cast<uint32_t*>(aDst + 4 * aLength);
1072 while (src >= aSrc) {
1073 uint8_t r = src[0];
1074 uint8_t g = src[1];
1075 uint8_t b = src[2];
1076 #if MOZ_LITTLE_ENDIAN()
1077 *--dst = 0x000000FF | (r << 8) | (g << 16) | (b << 24);
1078 #else
1079 *--dst = 0xFF000000 | (r << 24) | (g << 16) | b;
1080 #endif
1081 src -= 3;
1085 #define UNPACK_ROW_RGB_TO_ARGB(aDstFormat) \
1086 FORMAT_CASE_ROW(SurfaceFormat::R8G8B8, aDstFormat, UnpackRowRGB24_To_ARGB)
1088 bool SwizzleData(const uint8_t* aSrc, int32_t aSrcStride,
1089 SurfaceFormat aSrcFormat, uint8_t* aDst, int32_t aDstStride,
1090 SurfaceFormat aDstFormat, const IntSize& aSize) {
1091 if (aSize.IsEmpty()) {
1092 return true;
1094 IntSize size = CollapseSize(aSize, aSrcStride, aDstStride);
1095 // Find gap from end of row to the start of the next row.
1096 int32_t srcGap = GetStrideGap(aSize.width, aSrcFormat, aSrcStride);
1097 int32_t dstGap = GetStrideGap(aSize.width, aDstFormat, aDstStride);
1098 MOZ_ASSERT(srcGap >= 0 && dstGap >= 0);
1099 if (srcGap < 0 || dstGap < 0) {
1100 return false;
1103 #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size)
1105 #ifdef USE_SSE2
1106 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1107 SWIZZLE_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
1108 SWIZZLE_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8)
1109 SWIZZLE_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
1110 SWIZZLE_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8)
1111 SWIZZLE_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
1112 SWIZZLE_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8)
1113 SWIZZLE_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
1114 SWIZZLE_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8)
1115 default:
1116 break;
1118 #endif
1120 #ifdef USE_NEON
1121 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1122 SWIZZLE_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
1123 SWIZZLE_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8)
1124 SWIZZLE_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
1125 SWIZZLE_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8)
1126 SWIZZLE_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
1127 SWIZZLE_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8)
1128 SWIZZLE_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
1129 SWIZZLE_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8)
1130 default:
1131 break;
1133 #endif
1135 switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1136 SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
1137 SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8)
1138 SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
1139 SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8)
1141 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
1142 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8)
1143 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
1144 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8)
1145 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::A8R8G8B8)
1146 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::X8R8G8B8)
1148 SWIZZLE_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8A8)
1149 SWIZZLE_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8X8)
1150 SWIZZLE_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8X8)
1151 SWIZZLE_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8A8)
1153 SWIZZLE_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::A8R8G8B8)
1154 SWIZZLE_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::X8R8G8B8)
1155 SWIZZLE_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::X8R8G8B8)
1156 SWIZZLE_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::A8R8G8B8)
1157 SWIZZLE_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8A8)
1158 SWIZZLE_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8X8)
1159 SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8X8)
1160 SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8A8)
1162 SWIZZLE_SWAP_RGB24(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8)
1163 SWIZZLE_SWAP_RGB24(SurfaceFormat::B8G8R8, SurfaceFormat::R8G8B8)
1165 SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
1166 SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8X8, SurfaceFormat::B8G8R8A8)
1167 SWIZZLE_OPAQUE(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
1168 SWIZZLE_OPAQUE(SurfaceFormat::R8G8B8X8, SurfaceFormat::R8G8B8A8)
1169 SWIZZLE_OPAQUE(SurfaceFormat::A8R8G8B8, SurfaceFormat::X8R8G8B8)
1170 SWIZZLE_OPAQUE(SurfaceFormat::X8R8G8B8, SurfaceFormat::A8R8G8B8)
1172 PACK_RGB(SurfaceFormat::R5G6B5_UINT16, PackToRGB565)
1173 PACK_RGB(SurfaceFormat::B8G8R8, PackToRGB24)
1174 PACK_RGB(SurfaceFormat::R8G8B8, PackToRGB24)
1175 PACK_ALPHA(SurfaceFormat::A8, PackToA8)
1177 default:
1178 break;
1181 if (aSrcFormat == aDstFormat) {
1182 // If the formats match, just do a generic copy.
1183 SwizzleCopy(aSrc, srcGap, aDst, dstGap, size, BytesPerPixel(aSrcFormat));
1184 return true;
1187 #undef FORMAT_CASE_CALL
1189 MOZ_ASSERT(false, "Unsupported swizzle formats");
1190 return false;
1193 static bool SwizzleYFlipDataInternal(const uint8_t* aSrc, int32_t aSrcStride,
1194 SurfaceFormat aSrcFormat, uint8_t* aDst,
1195 int32_t aDstStride,
1196 SurfaceFormat aDstFormat,
1197 const IntSize& aSize,
1198 SwizzleRowFn aSwizzleFn) {
1199 if (!aSwizzleFn) {
1200 return false;
1203 // Guarantee our width and height are both greater than zero.
1204 if (aSize.IsEmpty()) {
1205 return true;
1208 // Unlike SwizzleData/PremultiplyData, we don't use the stride gaps directly,
1209 // but we can use it to verify that the stride is valid for our width and
1210 // format.
1211 int32_t srcGap = GetStrideGap(aSize.width, aSrcFormat, aSrcStride);
1212 int32_t dstGap = GetStrideGap(aSize.width, aDstFormat, aDstStride);
1213 MOZ_ASSERT(srcGap >= 0 && dstGap >= 0);
1214 if (srcGap < 0 || dstGap < 0) {
1215 return false;
1218 // Swapping/swizzling to a new buffer is trivial.
1219 if (aSrc != aDst) {
1220 const uint8_t* src = aSrc;
1221 const uint8_t* srcEnd = aSrc + aSize.height * aSrcStride;
1222 uint8_t* dst = aDst + (aSize.height - 1) * aDstStride;
1223 while (src < srcEnd) {
1224 aSwizzleFn(src, dst, aSize.width);
1225 src += aSrcStride;
1226 dst -= aDstStride;
1228 return true;
1231 if (aSrcStride != aDstStride) {
1232 return false;
1235 // If we are swizzling in place, then we need a temporary row buffer.
1236 UniquePtr<uint8_t[]> rowBuffer(new (std::nothrow) uint8_t[aDstStride]);
1237 if (!rowBuffer) {
1238 return false;
1241 // Swizzle and swap the top and bottom rows until we meet in the middle.
1242 int32_t middleRow = aSize.height / 2;
1243 uint8_t* top = aDst;
1244 uint8_t* bottom = aDst + (aSize.height - 1) * aDstStride;
1245 for (int32_t row = 0; row < middleRow; ++row) {
1246 memcpy(rowBuffer.get(), bottom, aDstStride);
1247 aSwizzleFn(top, bottom, aSize.width);
1248 aSwizzleFn(rowBuffer.get(), top, aSize.width);
1249 top += aDstStride;
1250 bottom -= aDstStride;
1253 // If there is an odd numbered row, we haven't swizzled it yet.
1254 if (aSize.height % 2 == 1) {
1255 top = aDst + middleRow * aDstStride;
1256 aSwizzleFn(top, top, aSize.width);
1258 return true;
1261 bool SwizzleYFlipData(const uint8_t* aSrc, int32_t aSrcStride,
1262 SurfaceFormat aSrcFormat, uint8_t* aDst,
1263 int32_t aDstStride, SurfaceFormat aDstFormat,
1264 const IntSize& aSize) {
1265 return SwizzleYFlipDataInternal(aSrc, aSrcStride, aSrcFormat, aDst,
1266 aDstStride, aDstFormat, aSize,
1267 SwizzleRow(aSrcFormat, aDstFormat));
1270 bool PremultiplyYFlipData(const uint8_t* aSrc, int32_t aSrcStride,
1271 SurfaceFormat aSrcFormat, uint8_t* aDst,
1272 int32_t aDstStride, SurfaceFormat aDstFormat,
1273 const IntSize& aSize) {
1274 return SwizzleYFlipDataInternal(aSrc, aSrcStride, aSrcFormat, aDst,
1275 aDstStride, aDstFormat, aSize,
1276 PremultiplyRow(aSrcFormat, aDstFormat));
1279 SwizzleRowFn SwizzleRow(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat) {
1280 #ifdef USE_SSE2
1281 if (mozilla::supports_avx2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1282 UNPACK_ROW_RGB_AVX2(SurfaceFormat::R8G8B8X8)
1283 UNPACK_ROW_RGB_AVX2(SurfaceFormat::R8G8B8A8)
1284 UNPACK_ROW_RGB_AVX2(SurfaceFormat::B8G8R8X8)
1285 UNPACK_ROW_RGB_AVX2(SurfaceFormat::B8G8R8A8)
1286 default:
1287 break;
1290 if (mozilla::supports_ssse3()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1291 UNPACK_ROW_RGB_SSSE3(SurfaceFormat::R8G8B8X8)
1292 UNPACK_ROW_RGB_SSSE3(SurfaceFormat::R8G8B8A8)
1293 UNPACK_ROW_RGB_SSSE3(SurfaceFormat::B8G8R8X8)
1294 UNPACK_ROW_RGB_SSSE3(SurfaceFormat::B8G8R8A8)
1295 default:
1296 break;
1299 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1300 SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
1301 SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8)
1302 SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
1303 SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8)
1304 SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
1305 SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8)
1306 SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
1307 SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8)
1308 default:
1309 break;
1311 #endif
1313 #ifdef USE_NEON
1314 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1315 UNPACK_ROW_RGB_NEON(SurfaceFormat::R8G8B8X8)
1316 UNPACK_ROW_RGB_NEON(SurfaceFormat::R8G8B8A8)
1317 UNPACK_ROW_RGB_NEON(SurfaceFormat::B8G8R8X8)
1318 UNPACK_ROW_RGB_NEON(SurfaceFormat::B8G8R8A8)
1319 SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
1320 SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8)
1321 SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
1322 SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8)
1323 SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
1324 SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8)
1325 SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
1326 SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8)
1327 default:
1328 break;
1330 #endif
1332 switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
1333 SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
1334 SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8)
1335 SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
1336 SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8)
1338 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
1339 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8)
1340 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
1341 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8)
1342 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::A8R8G8B8)
1343 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::X8R8G8B8)
1345 SWIZZLE_ROW_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8A8)
1346 SWIZZLE_ROW_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8X8)
1347 SWIZZLE_ROW_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8X8)
1348 SWIZZLE_ROW_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8A8)
1350 SWIZZLE_ROW_OPAQUE(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
1351 SWIZZLE_ROW_OPAQUE(SurfaceFormat::B8G8R8X8, SurfaceFormat::B8G8R8A8)
1352 SWIZZLE_ROW_OPAQUE(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
1353 SWIZZLE_ROW_OPAQUE(SurfaceFormat::R8G8B8X8, SurfaceFormat::R8G8B8A8)
1354 SWIZZLE_ROW_OPAQUE(SurfaceFormat::A8R8G8B8, SurfaceFormat::X8R8G8B8)
1355 SWIZZLE_ROW_OPAQUE(SurfaceFormat::X8R8G8B8, SurfaceFormat::A8R8G8B8)
1357 SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::A8R8G8B8)
1358 SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::X8R8G8B8)
1359 SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::X8R8G8B8)
1360 SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::A8R8G8B8)
1361 SWIZZLE_ROW_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8A8)
1362 SWIZZLE_ROW_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8X8)
1363 SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8X8)
1364 SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8A8)
1366 SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8)
1367 SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::B8G8R8, SurfaceFormat::R8G8B8)
1369 UNPACK_ROW_RGB(SurfaceFormat::R8G8B8X8)
1370 UNPACK_ROW_RGB(SurfaceFormat::R8G8B8A8)
1371 UNPACK_ROW_RGB(SurfaceFormat::B8G8R8X8)
1372 UNPACK_ROW_RGB(SurfaceFormat::B8G8R8A8)
1373 UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::A8R8G8B8)
1374 UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::X8R8G8B8)
1376 PACK_ROW_RGB(SurfaceFormat::R8G8B8, PackRowToRGB24)
1377 PACK_ROW_RGB(SurfaceFormat::B8G8R8, PackRowToRGB24)
1379 default:
1380 break;
1383 if (aSrcFormat == aDstFormat) {
1384 switch (BytesPerPixel(aSrcFormat)) {
1385 case 4:
1386 return &SwizzleRowCopy<4>;
1387 case 3:
1388 return &SwizzleRowCopy<3>;
1389 default:
1390 break;
1394 MOZ_ASSERT_UNREACHABLE("Unsupported swizzle formats");
1395 return nullptr;
1398 static IntRect ReorientRowRotate0FlipFallback(const uint8_t* aSrc,
1399 int32_t aSrcRow, uint8_t* aDst,
1400 const IntSize& aDstSize,
1401 int32_t aDstStride) {
1402 // Reverse order of pixels in the row.
1403 const uint32_t* src = reinterpret_cast<const uint32_t*>(aSrc);
1404 const uint32_t* end = src + aDstSize.width;
1405 uint32_t* dst = reinterpret_cast<uint32_t*>(aDst + aSrcRow * aDstStride) +
1406 aDstSize.width - 1;
1407 do {
1408 *dst-- = *src++;
1409 } while (src < end);
1411 return IntRect(0, aSrcRow, aDstSize.width, 1);
1414 static IntRect ReorientRowRotate90FlipFallback(const uint8_t* aSrc,
1415 int32_t aSrcRow, uint8_t* aDst,
1416 const IntSize& aDstSize,
1417 int32_t aDstStride) {
1418 // Copy row of pixels from top to bottom, into left to right columns.
1419 const uint32_t* src = reinterpret_cast<const uint32_t*>(aSrc);
1420 const uint32_t* end = src + aDstSize.height;
1421 uint32_t* dst = reinterpret_cast<uint32_t*>(aDst) + aSrcRow;
1422 int32_t stride = aDstStride / sizeof(uint32_t);
1423 do {
1424 *dst = *src++;
1425 dst += stride;
1426 } while (src < end);
1428 return IntRect(aSrcRow, 0, 1, aDstSize.height);
1431 static IntRect ReorientRowRotate180FlipFallback(const uint8_t* aSrc,
1432 int32_t aSrcRow, uint8_t* aDst,
1433 const IntSize& aDstSize,
1434 int32_t aDstStride) {
1435 // Copy row of pixels from top to bottom, into bottom to top rows.
1436 uint8_t* dst = aDst + (aDstSize.height - aSrcRow - 1) * aDstStride;
1437 memcpy(dst, aSrc, aDstSize.width * sizeof(uint32_t));
1438 return IntRect(0, aDstSize.height - aSrcRow - 1, aDstSize.width, 1);
1441 static IntRect ReorientRowRotate270FlipFallback(const uint8_t* aSrc,
1442 int32_t aSrcRow, uint8_t* aDst,
1443 const IntSize& aDstSize,
1444 int32_t aDstStride) {
1445 // Copy row of pixels in reverse order from top to bottom, into right to left
1446 // columns.
1447 const uint32_t* src = reinterpret_cast<const uint32_t*>(aSrc);
1448 const uint32_t* end = src + aDstSize.height;
1449 uint32_t* dst =
1450 reinterpret_cast<uint32_t*>(aDst + (aDstSize.height - 1) * aDstStride) +
1451 aDstSize.width - aSrcRow - 1;
1452 int32_t stride = aDstStride / sizeof(uint32_t);
1453 do {
1454 *dst = *src++;
1455 dst -= stride;
1456 } while (src < end);
1458 return IntRect(aDstSize.width - aSrcRow - 1, 0, 1, aDstSize.height);
1461 static IntRect ReorientRowRotate0Fallback(const uint8_t* aSrc, int32_t aSrcRow,
1462 uint8_t* aDst,
1463 const IntSize& aDstSize,
1464 int32_t aDstStride) {
1465 // Copy row of pixels into the destination.
1466 uint8_t* dst = aDst + aSrcRow * aDstStride;
1467 memcpy(dst, aSrc, aDstSize.width * sizeof(uint32_t));
1468 return IntRect(0, aSrcRow, aDstSize.width, 1);
1471 static IntRect ReorientRowRotate90Fallback(const uint8_t* aSrc, int32_t aSrcRow,
1472 uint8_t* aDst,
1473 const IntSize& aDstSize,
1474 int32_t aDstStride) {
1475 // Copy row of pixels from top to bottom, into right to left columns.
1476 const uint32_t* src = reinterpret_cast<const uint32_t*>(aSrc);
1477 const uint32_t* end = src + aDstSize.height;
1478 uint32_t* dst =
1479 reinterpret_cast<uint32_t*>(aDst) + aDstSize.width - aSrcRow - 1;
1480 int32_t stride = aDstStride / sizeof(uint32_t);
1481 do {
1482 *dst = *src++;
1483 dst += stride;
1484 } while (src < end);
1486 return IntRect(aDstSize.width - aSrcRow - 1, 0, 1, aDstSize.height);
1489 static IntRect ReorientRowRotate180Fallback(const uint8_t* aSrc,
1490 int32_t aSrcRow, uint8_t* aDst,
1491 const IntSize& aDstSize,
1492 int32_t aDstStride) {
1493 // Copy row of pixels in reverse order from top to bottom, into bottom to top
1494 // rows.
1495 const uint32_t* src = reinterpret_cast<const uint32_t*>(aSrc);
1496 const uint32_t* end = src + aDstSize.width;
1497 uint32_t* dst = reinterpret_cast<uint32_t*>(
1498 aDst + (aDstSize.height - aSrcRow - 1) * aDstStride) +
1499 aDstSize.width - 1;
1500 do {
1501 *dst-- = *src++;
1502 } while (src < end);
1504 return IntRect(0, aDstSize.height - aSrcRow - 1, aDstSize.width, 1);
1507 static IntRect ReorientRowRotate270Fallback(const uint8_t* aSrc,
1508 int32_t aSrcRow, uint8_t* aDst,
1509 const IntSize& aDstSize,
1510 int32_t aDstStride) {
1511 // Copy row of pixels in reverse order from top to bottom, into left to right
1512 // column.
1513 const uint32_t* src = reinterpret_cast<const uint32_t*>(aSrc);
1514 const uint32_t* end = src + aDstSize.height;
1515 uint32_t* dst =
1516 reinterpret_cast<uint32_t*>(aDst + (aDstSize.height - 1) * aDstStride) +
1517 aSrcRow;
1518 int32_t stride = aDstStride / sizeof(uint32_t);
1519 do {
1520 *dst = *src++;
1521 dst -= stride;
1522 } while (src < end);
1524 return IntRect(aSrcRow, 0, 1, aDstSize.height);
1527 ReorientRowFn ReorientRow(const struct image::Orientation& aOrientation) {
1528 switch (aOrientation.flip) {
1529 case image::Flip::Unflipped:
1530 switch (aOrientation.rotation) {
1531 case image::Angle::D0:
1532 return &ReorientRowRotate0Fallback;
1533 case image::Angle::D90:
1534 return &ReorientRowRotate90Fallback;
1535 case image::Angle::D180:
1536 return &ReorientRowRotate180Fallback;
1537 case image::Angle::D270:
1538 return &ReorientRowRotate270Fallback;
1539 default:
1540 break;
1542 break;
1543 case image::Flip::Horizontal:
1544 switch (aOrientation.rotation) {
1545 case image::Angle::D0:
1546 return &ReorientRowRotate0FlipFallback;
1547 case image::Angle::D90:
1548 if (aOrientation.flipFirst) {
1549 return &ReorientRowRotate270FlipFallback;
1550 } else {
1551 return &ReorientRowRotate90FlipFallback;
1553 case image::Angle::D180:
1554 return &ReorientRowRotate180FlipFallback;
1555 case image::Angle::D270:
1556 if (aOrientation.flipFirst) {
1557 return &ReorientRowRotate90FlipFallback;
1558 } else {
1559 return &ReorientRowRotate270FlipFallback;
1561 default:
1562 break;
1564 break;
1565 default:
1566 break;
1569 MOZ_ASSERT_UNREACHABLE("Unhandled orientation!");
1570 return nullptr;
1573 } // namespace gfx
1574 } // namespace mozilla