1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
10 #include "mozilla/CheckedInt.h"
11 #include "mozilla/EndianUtils.h"
14 # include "mozilla/SSE.h"
18 # include "mozilla/arm.h"
25 * Convenience macros for dispatching to various format combinations.
28 // Hash the formats to a relatively dense value to optimize jump table
29 // generation. The first 6 formats in SurfaceFormat are the 32-bit BGRA variants
30 // and are the most common formats dispatched here. Room is reserved in the
31 // lowish bits for up to these 6 destination formats. If a destination format is
32 // >= 6, the 6th bit is set to avoid collisions.
33 #define FORMAT_KEY(aSrcFormat, aDstFormat) \
34 (int(aSrcFormat) * 6 + int(aDstFormat) + (int(int(aDstFormat) >= 6) << 6))
36 #define FORMAT_CASE_EXPR(aSrcFormat, aDstFormat, ...) \
37 case FORMAT_KEY(aSrcFormat, aDstFormat): \
41 #define FORMAT_CASE(aSrcFormat, aDstFormat, ...) \
42 FORMAT_CASE_EXPR(aSrcFormat, aDstFormat, FORMAT_CASE_CALL(__VA_ARGS__))
44 #define FORMAT_CASE_ROW(aSrcFormat, aDstFormat, ...) \
45 case FORMAT_KEY(aSrcFormat, aDstFormat): \
49 * Constexpr functions for analyzing format attributes in templates.
52 // Whether B comes before R in pixel memory layout.
53 static constexpr bool IsBGRFormat(SurfaceFormat aFormat
) {
54 return aFormat
== SurfaceFormat::B8G8R8A8
||
55 #if MOZ_LITTLE_ENDIAN()
56 aFormat
== SurfaceFormat::R5G6B5_UINT16
||
58 aFormat
== SurfaceFormat::B8G8R8X8
|| aFormat
== SurfaceFormat::B8G8R8
;
61 // Whether the order of B and R need to be swapped to map from src to dst.
62 static constexpr bool ShouldSwapRB(SurfaceFormat aSrcFormat
,
63 SurfaceFormat aDstFormat
) {
64 return IsBGRFormat(aSrcFormat
) != IsBGRFormat(aDstFormat
);
67 // The starting byte of the RGB components in pixel memory.
68 static constexpr uint32_t RGBByteIndex(SurfaceFormat aFormat
) {
69 return aFormat
== SurfaceFormat::A8R8G8B8
||
70 aFormat
== SurfaceFormat::X8R8G8B8
75 // The byte of the alpha component, which just comes after RGB.
76 static constexpr uint32_t AlphaByteIndex(SurfaceFormat aFormat
) {
77 return (RGBByteIndex(aFormat
) + 3) % 4;
80 // The endian-dependent bit shift to access RGB of a UINT32 pixel.
81 static constexpr uint32_t RGBBitShift(SurfaceFormat aFormat
) {
82 #if MOZ_LITTLE_ENDIAN()
83 return 8 * RGBByteIndex(aFormat
);
85 return 8 - 8 * RGBByteIndex(aFormat
);
89 // The endian-dependent bit shift to access alpha of a UINT32 pixel.
90 static constexpr uint32_t AlphaBitShift(SurfaceFormat aFormat
) {
91 return (RGBBitShift(aFormat
) + 24) % 32;
94 // Whether the pixel format should ignore the value of the alpha channel and
95 // treat it as opaque.
96 static constexpr bool IgnoreAlpha(SurfaceFormat aFormat
) {
97 return aFormat
== SurfaceFormat::B8G8R8X8
||
98 aFormat
== SurfaceFormat::R8G8B8X8
||
99 aFormat
== SurfaceFormat::X8R8G8B8
;
102 // Whether to force alpha to opaque to map from src to dst.
103 static constexpr bool ShouldForceOpaque(SurfaceFormat aSrcFormat
,
104 SurfaceFormat aDstFormat
) {
105 return IgnoreAlpha(aSrcFormat
) != IgnoreAlpha(aDstFormat
);
113 template <bool aSwapRB
, bool aOpaqueAlpha
>
114 void Premultiply_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize
);
116 # define PREMULTIPLY_SSE2(aSrcFormat, aDstFormat) \
117 FORMAT_CASE(aSrcFormat, aDstFormat, \
118 Premultiply_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \
119 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
121 template <bool aSwapRB
, bool aOpaqueAlpha
>
122 void PremultiplyRow_SSE2(const uint8_t*, uint8_t*, int32_t);
124 # define PREMULTIPLY_ROW_SSE2(aSrcFormat, aDstFormat) \
126 aSrcFormat, aDstFormat, \
127 PremultiplyRow_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \
128 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
130 template <bool aSwapRB
>
131 void Unpremultiply_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize
);
133 # define UNPREMULTIPLY_SSE2(aSrcFormat, aDstFormat) \
134 FORMAT_CASE(aSrcFormat, aDstFormat, \
135 Unpremultiply_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat)>)
137 template <bool aSwapRB
>
138 void UnpremultiplyRow_SSE2(const uint8_t*, uint8_t*, int32_t);
140 # define UNPREMULTIPLY_ROW_SSE2(aSrcFormat, aDstFormat) \
142 aSrcFormat, aDstFormat, \
143 UnpremultiplyRow_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat)>)
145 template <bool aSwapRB
, bool aOpaqueAlpha
>
146 void Swizzle_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize
);
148 # define SWIZZLE_SSE2(aSrcFormat, aDstFormat) \
149 FORMAT_CASE(aSrcFormat, aDstFormat, \
150 Swizzle_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \
151 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
153 template <bool aSwapRB
, bool aOpaqueAlpha
>
154 void SwizzleRow_SSE2(const uint8_t*, uint8_t*, int32_t);
156 # define SWIZZLE_ROW_SSE2(aSrcFormat, aDstFormat) \
158 aSrcFormat, aDstFormat, \
159 SwizzleRow_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \
160 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
162 template <bool aSwapRB
>
163 void UnpackRowRGB24_SSSE3(const uint8_t*, uint8_t*, int32_t);
165 # define UNPACK_ROW_RGB_SSSE3(aDstFormat) \
167 SurfaceFormat::R8G8B8, aDstFormat, \
168 UnpackRowRGB24_SSSE3<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>)
170 template <bool aSwapRB
>
171 void UnpackRowRGB24_AVX2(const uint8_t*, uint8_t*, int32_t);
173 # define UNPACK_ROW_RGB_AVX2(aDstFormat) \
175 SurfaceFormat::R8G8B8, aDstFormat, \
176 UnpackRowRGB24_AVX2<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>)
182 * ARM NEON optimizations
185 template <bool aSwapRB
, bool aOpaqueAlpha
>
186 void Premultiply_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize
);
188 # define PREMULTIPLY_NEON(aSrcFormat, aDstFormat) \
189 FORMAT_CASE(aSrcFormat, aDstFormat, \
190 Premultiply_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \
191 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
193 template <bool aSwapRB
, bool aOpaqueAlpha
>
194 void PremultiplyRow_NEON(const uint8_t*, uint8_t*, int32_t);
196 # define PREMULTIPLY_ROW_NEON(aSrcFormat, aDstFormat) \
198 aSrcFormat, aDstFormat, \
199 PremultiplyRow_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \
200 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
202 template <bool aSwapRB
>
203 void Unpremultiply_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize
);
205 # define UNPREMULTIPLY_NEON(aSrcFormat, aDstFormat) \
206 FORMAT_CASE(aSrcFormat, aDstFormat, \
207 Unpremultiply_NEON<ShouldSwapRB(aSrcFormat, aDstFormat)>)
209 template <bool aSwapRB
>
210 void UnpremultiplyRow_NEON(const uint8_t*, uint8_t*, int32_t);
212 # define UNPREMULTIPLY_ROW_NEON(aSrcFormat, aDstFormat) \
214 aSrcFormat, aDstFormat, \
215 UnpremultiplyRow_NEON<ShouldSwapRB(aSrcFormat, aDstFormat)>)
217 template <bool aSwapRB
, bool aOpaqueAlpha
>
218 void Swizzle_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize
);
220 # define SWIZZLE_NEON(aSrcFormat, aDstFormat) \
221 FORMAT_CASE(aSrcFormat, aDstFormat, \
222 Swizzle_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \
223 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
225 template <bool aSwapRB
, bool aOpaqueAlpha
>
226 void SwizzleRow_NEON(const uint8_t*, uint8_t*, int32_t);
228 # define SWIZZLE_ROW_NEON(aSrcFormat, aDstFormat) \
230 aSrcFormat, aDstFormat, \
231 SwizzleRow_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \
232 ShouldForceOpaque(aSrcFormat, aDstFormat)>)
234 template <bool aSwapRB
>
235 void UnpackRowRGB24_NEON(const uint8_t*, uint8_t*, int32_t);
237 # define UNPACK_ROW_RGB_NEON(aDstFormat) \
239 SurfaceFormat::R8G8B8, aDstFormat, \
240 UnpackRowRGB24_NEON<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>)
247 // Fallback premultiply implementation that uses splayed pixel math to reduce
248 // the multiplications used. That is, the R and B components are isolated from
249 // the G and A components, which then can be multiplied as if they were two
250 // 2-component vectors. Otherwise, an approximation if divide-by-255 is used
251 // which is faster than an actual division. These optimizations are also used
252 // for the SSE2 and NEON implementations.
253 template <bool aSwapRB
, bool aOpaqueAlpha
, uint32_t aSrcRGBShift
,
254 uint32_t aSrcAShift
, uint32_t aDstRGBShift
, uint32_t aDstAShift
>
255 static void PremultiplyChunkFallback(const uint8_t*& aSrc
, uint8_t*& aDst
,
257 const uint8_t* end
= aSrc
+ 4 * aLength
;
259 // Load and process 1 entire pixel at a time.
260 uint32_t color
= *reinterpret_cast<const uint32_t*>(aSrc
);
262 uint32_t a
= aSrcAShift
? color
>> aSrcAShift
: color
& 0xFF;
264 // Isolate the R and B components.
265 uint32_t rb
= (color
>> aSrcRGBShift
) & 0x00FF00FF;
266 // Swap the order of R and B if necessary.
268 rb
= (rb
>> 16) | (rb
<< 16);
270 // Approximate the multiply by alpha and divide by 255 which is
272 // c = c*a + 255; c = (c + (c >> 8)) >> 8;
273 // However, we omit the final >> 8 to fold it with the final shift into
274 // place depending on desired output format.
275 rb
= rb
* a
+ 0x00FF00FF;
276 rb
= (rb
+ ((rb
>> 8) & 0x00FF00FF)) & 0xFF00FF00;
278 // Use same approximation as above, but G is shifted 8 bits left.
279 // Alpha is left out and handled separately.
280 uint32_t g
= color
& (0xFF00 << aSrcRGBShift
);
281 g
= g
* a
+ (0xFF00 << aSrcRGBShift
);
282 g
= (g
+ (g
>> 8)) & (0xFF0000 << aSrcRGBShift
);
284 // The above math leaves RGB shifted left by 8 bits.
285 // Shift them right if required for the output format.
286 // then combine them back together to produce output pixel.
287 // Add the alpha back on if the output format is not opaque.
288 *reinterpret_cast<uint32_t*>(aDst
) =
289 (rb
>> (8 - aDstRGBShift
)) | (g
>> (8 + aSrcRGBShift
- aDstRGBShift
)) |
290 (aOpaqueAlpha
? 0xFF << aDstAShift
: a
<< aDstAShift
);
294 } while (aSrc
< end
);
297 template <bool aSwapRB
, bool aOpaqueAlpha
, uint32_t aSrcRGBShift
,
298 uint32_t aSrcAShift
, uint32_t aDstRGBShift
, uint32_t aDstAShift
>
299 static void PremultiplyRowFallback(const uint8_t* aSrc
, uint8_t* aDst
,
301 PremultiplyChunkFallback
<aSwapRB
, aOpaqueAlpha
, aSrcRGBShift
, aSrcAShift
,
302 aDstRGBShift
, aDstAShift
>(aSrc
, aDst
, aLength
);
305 template <bool aSwapRB
, bool aOpaqueAlpha
, uint32_t aSrcRGBShift
,
306 uint32_t aSrcAShift
, uint32_t aDstRGBShift
, uint32_t aDstAShift
>
307 static void PremultiplyFallback(const uint8_t* aSrc
, int32_t aSrcGap
,
308 uint8_t* aDst
, int32_t aDstGap
, IntSize aSize
) {
309 for (int32_t height
= aSize
.height
; height
> 0; height
--) {
310 PremultiplyChunkFallback
<aSwapRB
, aOpaqueAlpha
, aSrcRGBShift
, aSrcAShift
,
311 aDstRGBShift
, aDstAShift
>(aSrc
, aDst
, aSize
.width
);
317 #define PREMULTIPLY_FALLBACK_CASE(aSrcFormat, aDstFormat) \
319 aSrcFormat, aDstFormat, \
320 PremultiplyFallback<ShouldSwapRB(aSrcFormat, aDstFormat), \
321 ShouldForceOpaque(aSrcFormat, aDstFormat), \
322 RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
323 RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)
325 #define PREMULTIPLY_FALLBACK(aSrcFormat) \
326 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
327 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8X8) \
328 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
329 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8X8) \
330 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8) \
331 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::X8R8G8B8)
333 #define PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, aDstFormat) \
334 FORMAT_CASE_ROW(aSrcFormat, aDstFormat, \
335 PremultiplyRowFallback< \
336 ShouldSwapRB(aSrcFormat, aDstFormat), \
337 ShouldForceOpaque(aSrcFormat, aDstFormat), \
338 RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
339 RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)
341 #define PREMULTIPLY_ROW_FALLBACK(aSrcFormat) \
342 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
343 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8X8) \
344 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
345 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8X8) \
346 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8) \
347 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::X8R8G8B8)
349 // If rows are tightly packed, and the size of the total area will fit within
350 // the precision range of a single row, then process all the data as if it was
352 static inline IntSize
CollapseSize(const IntSize
& aSize
, int32_t aSrcStride
,
353 int32_t aDstStride
) {
354 if (aSrcStride
== aDstStride
&& (aSrcStride
& 3) == 0 &&
355 aSrcStride
/ 4 == aSize
.width
) {
356 CheckedInt32 area
= CheckedInt32(aSize
.width
) * CheckedInt32(aSize
.height
);
357 if (area
.isValid()) {
358 return IntSize(area
.value(), 1);
364 static inline int32_t GetStrideGap(int32_t aWidth
, SurfaceFormat aFormat
,
366 CheckedInt32 used
= CheckedInt32(aWidth
) * BytesPerPixel(aFormat
);
367 if (!used
.isValid() || used
.value() < 0) {
370 return aStride
- used
.value();
373 bool PremultiplyData(const uint8_t* aSrc
, int32_t aSrcStride
,
374 SurfaceFormat aSrcFormat
, uint8_t* aDst
,
375 int32_t aDstStride
, SurfaceFormat aDstFormat
,
376 const IntSize
& aSize
) {
377 if (aSize
.IsEmpty()) {
380 IntSize size
= CollapseSize(aSize
, aSrcStride
, aDstStride
);
381 // Find gap from end of row to the start of the next row.
382 int32_t srcGap
= GetStrideGap(aSize
.width
, aSrcFormat
, aSrcStride
);
383 int32_t dstGap
= GetStrideGap(aSize
.width
, aDstFormat
, aDstStride
);
384 MOZ_ASSERT(srcGap
>= 0 && dstGap
>= 0);
385 if (srcGap
< 0 || dstGap
< 0) {
389 #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size)
392 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
393 PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::B8G8R8A8
)
394 PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::B8G8R8X8
)
395 PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8A8
)
396 PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8X8
)
397 PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::R8G8B8A8
)
398 PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::R8G8B8X8
)
399 PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8A8
)
400 PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8X8
)
407 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
408 PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::B8G8R8A8
)
409 PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::B8G8R8X8
)
410 PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8A8
)
411 PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8X8
)
412 PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::R8G8B8A8
)
413 PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::R8G8B8X8
)
414 PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8A8
)
415 PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8X8
)
421 switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
422 PREMULTIPLY_FALLBACK(SurfaceFormat::B8G8R8A8
)
423 PREMULTIPLY_FALLBACK(SurfaceFormat::R8G8B8A8
)
424 PREMULTIPLY_FALLBACK(SurfaceFormat::A8R8G8B8
)
429 #undef FORMAT_CASE_CALL
431 MOZ_ASSERT(false, "Unsupported premultiply formats");
435 SwizzleRowFn
PremultiplyRow(SurfaceFormat aSrcFormat
,
436 SurfaceFormat aDstFormat
) {
438 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
439 PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::B8G8R8A8
)
440 PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::B8G8R8X8
)
441 PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8A8
)
442 PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8X8
)
443 PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::R8G8B8A8
)
444 PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::R8G8B8X8
)
445 PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8A8
)
446 PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8X8
)
453 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
454 PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::B8G8R8A8
)
455 PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::B8G8R8X8
)
456 PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8A8
)
457 PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8X8
)
458 PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::R8G8B8A8
)
459 PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::R8G8B8X8
)
460 PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8A8
)
461 PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8X8
)
467 switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
468 PREMULTIPLY_ROW_FALLBACK(SurfaceFormat::B8G8R8A8
)
469 PREMULTIPLY_ROW_FALLBACK(SurfaceFormat::R8G8B8A8
)
470 PREMULTIPLY_ROW_FALLBACK(SurfaceFormat::A8R8G8B8
)
475 MOZ_ASSERT_UNREACHABLE("Unsupported premultiply formats");
483 // Generate a table of 8.16 fixed-point reciprocals representing 1/alpha.
484 #define UNPREMULQ(x) (0xFF00FFU / (x))
485 #define UNPREMULQ_2(x) UNPREMULQ(x), UNPREMULQ((x) + 1)
486 #define UNPREMULQ_4(x) UNPREMULQ_2(x), UNPREMULQ_2((x) + 2)
487 #define UNPREMULQ_8(x) UNPREMULQ_4(x), UNPREMULQ_4((x) + 4)
488 #define UNPREMULQ_16(x) UNPREMULQ_8(x), UNPREMULQ_8((x) + 8)
489 #define UNPREMULQ_32(x) UNPREMULQ_16(x), UNPREMULQ_16((x) + 16)
490 static const uint32_t sUnpremultiplyTable
[256] = {0,
504 // Fallback unpremultiply implementation that uses 8.16 fixed-point reciprocal
505 // math to eliminate any division by the alpha component. This optimization is
506 // used for the SSE2 and NEON implementations, with some adaptations. This
507 // implementation also accesses color components using individual byte accesses
508 // as this profiles faster than accessing the pixel as a uint32_t and
509 // shifting/masking to access components.
510 template <bool aSwapRB
, uint32_t aSrcRGBIndex
, uint32_t aSrcAIndex
,
511 uint32_t aDstRGBIndex
, uint32_t aDstAIndex
>
512 static void UnpremultiplyChunkFallback(const uint8_t*& aSrc
, uint8_t*& aDst
,
514 const uint8_t* end
= aSrc
+ 4 * aLength
;
516 uint8_t r
= aSrc
[aSrcRGBIndex
+ (aSwapRB
? 2 : 0)];
517 uint8_t g
= aSrc
[aSrcRGBIndex
+ 1];
518 uint8_t b
= aSrc
[aSrcRGBIndex
+ (aSwapRB
? 0 : 2)];
519 uint8_t a
= aSrc
[aSrcAIndex
];
521 // Access the 8.16 reciprocal from the table based on alpha. Multiply by
522 // the reciprocal and shift off the fraction bits to approximate the
523 // division by alpha.
524 uint32_t q
= sUnpremultiplyTable
[a
];
525 aDst
[aDstRGBIndex
+ 0] = (r
* q
) >> 16;
526 aDst
[aDstRGBIndex
+ 1] = (g
* q
) >> 16;
527 aDst
[aDstRGBIndex
+ 2] = (b
* q
) >> 16;
528 aDst
[aDstAIndex
] = a
;
532 } while (aSrc
< end
);
535 template <bool aSwapRB
, uint32_t aSrcRGBIndex
, uint32_t aSrcAIndex
,
536 uint32_t aDstRGBIndex
, uint32_t aDstAIndex
>
537 static void UnpremultiplyRowFallback(const uint8_t* aSrc
, uint8_t* aDst
,
539 UnpremultiplyChunkFallback
<aSwapRB
, aSrcRGBIndex
, aSrcAIndex
, aDstRGBIndex
,
540 aDstAIndex
>(aSrc
, aDst
, aLength
);
543 template <bool aSwapRB
, uint32_t aSrcRGBIndex
, uint32_t aSrcAIndex
,
544 uint32_t aDstRGBIndex
, uint32_t aDstAIndex
>
545 static void UnpremultiplyFallback(const uint8_t* aSrc
, int32_t aSrcGap
,
546 uint8_t* aDst
, int32_t aDstGap
,
548 for (int32_t height
= aSize
.height
; height
> 0; height
--) {
549 UnpremultiplyChunkFallback
<aSwapRB
, aSrcRGBIndex
, aSrcAIndex
, aDstRGBIndex
,
550 aDstAIndex
>(aSrc
, aDst
, aSize
.width
);
556 #define UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, aDstFormat) \
557 FORMAT_CASE(aSrcFormat, aDstFormat, \
558 UnpremultiplyFallback< \
559 ShouldSwapRB(aSrcFormat, aDstFormat), \
560 RGBByteIndex(aSrcFormat), AlphaByteIndex(aSrcFormat), \
561 RGBByteIndex(aDstFormat), AlphaByteIndex(aDstFormat)>)
563 #define UNPREMULTIPLY_FALLBACK(aSrcFormat) \
564 UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
565 UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
566 UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8)
568 #define UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, aDstFormat) \
569 FORMAT_CASE_ROW(aSrcFormat, aDstFormat, \
570 UnpremultiplyRowFallback< \
571 ShouldSwapRB(aSrcFormat, aDstFormat), \
572 RGBByteIndex(aSrcFormat), AlphaByteIndex(aSrcFormat), \
573 RGBByteIndex(aDstFormat), AlphaByteIndex(aDstFormat)>)
575 #define UNPREMULTIPLY_ROW_FALLBACK(aSrcFormat) \
576 UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
577 UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
578 UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8)
580 bool UnpremultiplyData(const uint8_t* aSrc
, int32_t aSrcStride
,
581 SurfaceFormat aSrcFormat
, uint8_t* aDst
,
582 int32_t aDstStride
, SurfaceFormat aDstFormat
,
583 const IntSize
& aSize
) {
584 if (aSize
.IsEmpty()) {
587 IntSize size
= CollapseSize(aSize
, aSrcStride
, aDstStride
);
588 // Find gap from end of row to the start of the next row.
589 int32_t srcGap
= GetStrideGap(aSize
.width
, aSrcFormat
, aSrcStride
);
590 int32_t dstGap
= GetStrideGap(aSize
.width
, aDstFormat
, aDstStride
);
591 MOZ_ASSERT(srcGap
>= 0 && dstGap
>= 0);
592 if (srcGap
< 0 || dstGap
< 0) {
596 #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size)
599 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
600 UNPREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::B8G8R8A8
)
601 UNPREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8A8
)
602 UNPREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::R8G8B8A8
)
603 UNPREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8A8
)
610 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
611 UNPREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::B8G8R8A8
)
612 UNPREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8A8
)
613 UNPREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::R8G8B8A8
)
614 UNPREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8A8
)
620 switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
621 UNPREMULTIPLY_FALLBACK(SurfaceFormat::B8G8R8A8
)
622 UNPREMULTIPLY_FALLBACK(SurfaceFormat::R8G8B8A8
)
623 UNPREMULTIPLY_FALLBACK(SurfaceFormat::A8R8G8B8
)
628 #undef FORMAT_CASE_CALL
630 MOZ_ASSERT(false, "Unsupported unpremultiply formats");
634 SwizzleRowFn
UnpremultiplyRow(SurfaceFormat aSrcFormat
,
635 SurfaceFormat aDstFormat
) {
637 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
638 UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::B8G8R8A8
)
639 UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8A8
)
640 UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::R8G8B8A8
)
641 UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8A8
)
648 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
649 UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::B8G8R8A8
)
650 UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8A8
)
651 UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::R8G8B8A8
)
652 UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8A8
)
658 switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
659 UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::B8G8R8A8
)
660 UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::R8G8B8A8
)
661 UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::A8R8G8B8
)
666 MOZ_ASSERT_UNREACHABLE("Unsupported premultiply formats");
674 // Fallback swizzle implementation that uses shifting and masking to reorder
676 template <bool aSwapRB
, bool aOpaqueAlpha
, uint32_t aSrcRGBShift
,
677 uint32_t aSrcAShift
, uint32_t aDstRGBShift
, uint32_t aDstAShift
>
678 static void SwizzleChunkFallback(const uint8_t*& aSrc
, uint8_t*& aDst
,
680 const uint8_t* end
= aSrc
+ 4 * aLength
;
682 uint32_t rgba
= *reinterpret_cast<const uint32_t*>(aSrc
);
685 // Handle R and B swaps by exchanging words and masking.
687 ((rgba
<< 16) | (rgba
>> 16)) & (0x00FF00FF << aSrcRGBShift
);
688 uint32_t ga
= rgba
& ((0xFF << aSrcAShift
) | (0xFF00 << aSrcRGBShift
));
692 // If src and dst shifts differ, rotate left or right to move RGB into
693 // place, i.e. ARGB -> RGBA or ARGB -> RGBA.
694 if (aDstRGBShift
> aSrcRGBShift
) {
695 rgba
= (rgba
<< 8) | (aOpaqueAlpha
? 0x000000FF : rgba
>> 24);
696 } else if (aSrcRGBShift
> aDstRGBShift
) {
697 rgba
= (rgba
>> 8) | (aOpaqueAlpha
? 0xFF000000 : rgba
<< 24);
698 } else if (aOpaqueAlpha
) {
699 rgba
|= 0xFF << aDstAShift
;
702 *reinterpret_cast<uint32_t*>(aDst
) = rgba
;
706 } while (aSrc
< end
);
709 template <bool aSwapRB
, bool aOpaqueAlpha
, uint32_t aSrcRGBShift
,
710 uint32_t aSrcAShift
, uint32_t aDstRGBShift
, uint32_t aDstAShift
>
711 static void SwizzleRowFallback(const uint8_t* aSrc
, uint8_t* aDst
,
713 SwizzleChunkFallback
<aSwapRB
, aOpaqueAlpha
, aSrcRGBShift
, aSrcAShift
,
714 aDstRGBShift
, aDstAShift
>(aSrc
, aDst
, aLength
);
717 template <bool aSwapRB
, bool aOpaqueAlpha
, uint32_t aSrcRGBShift
,
718 uint32_t aSrcAShift
, uint32_t aDstRGBShift
, uint32_t aDstAShift
>
719 static void SwizzleFallback(const uint8_t* aSrc
, int32_t aSrcGap
, uint8_t* aDst
,
720 int32_t aDstGap
, IntSize aSize
) {
721 for (int32_t height
= aSize
.height
; height
> 0; height
--) {
722 SwizzleChunkFallback
<aSwapRB
, aOpaqueAlpha
, aSrcRGBShift
, aSrcAShift
,
723 aDstRGBShift
, aDstAShift
>(aSrc
, aDst
, aSize
.width
);
729 #define SWIZZLE_FALLBACK(aSrcFormat, aDstFormat) \
731 aSrcFormat, aDstFormat, \
732 SwizzleFallback<ShouldSwapRB(aSrcFormat, aDstFormat), \
733 ShouldForceOpaque(aSrcFormat, aDstFormat), \
734 RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
735 RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)
737 #define SWIZZLE_ROW_FALLBACK(aSrcFormat, aDstFormat) \
739 aSrcFormat, aDstFormat, \
740 SwizzleRowFallback<ShouldSwapRB(aSrcFormat, aDstFormat), \
741 ShouldForceOpaque(aSrcFormat, aDstFormat), \
742 RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
743 RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)
745 // Fast-path for matching formats.
746 template <int32_t aBytesPerPixel
>
747 static void SwizzleRowCopy(const uint8_t* aSrc
, uint8_t* aDst
,
750 memcpy(aDst
, aSrc
, aLength
* aBytesPerPixel
);
754 // Fast-path for matching formats.
755 static void SwizzleCopy(const uint8_t* aSrc
, int32_t aSrcGap
, uint8_t* aDst
,
756 int32_t aDstGap
, IntSize aSize
, int32_t aBPP
) {
758 int32_t rowLength
= aBPP
* aSize
.width
;
759 for (int32_t height
= aSize
.height
; height
> 0; height
--) {
760 memcpy(aDst
, aSrc
, rowLength
);
761 aSrc
+= rowLength
+ aSrcGap
;
762 aDst
+= rowLength
+ aDstGap
;
767 // Fast-path for conversions that swap all bytes.
768 template <bool aOpaqueAlpha
, uint32_t aSrcAShift
, uint32_t aDstAShift
>
769 static void SwizzleChunkSwap(const uint8_t*& aSrc
, uint8_t*& aDst
,
771 const uint8_t* end
= aSrc
+ 4 * aLength
;
773 // Use an endian swap to move the bytes, i.e. BGRA -> ARGB.
774 uint32_t rgba
= *reinterpret_cast<const uint32_t*>(aSrc
);
775 #if MOZ_LITTLE_ENDIAN()
776 rgba
= NativeEndian::swapToBigEndian(rgba
);
778 rgba
= NativeEndian::swapToLittleEndian(rgba
);
781 rgba
|= 0xFF << aDstAShift
;
783 *reinterpret_cast<uint32_t*>(aDst
) = rgba
;
786 } while (aSrc
< end
);
789 template <bool aOpaqueAlpha
, uint32_t aSrcAShift
, uint32_t aDstAShift
>
790 static void SwizzleRowSwap(const uint8_t* aSrc
, uint8_t* aDst
,
792 SwizzleChunkSwap
<aOpaqueAlpha
, aSrcAShift
, aDstAShift
>(aSrc
, aDst
, aLength
);
795 template <bool aOpaqueAlpha
, uint32_t aSrcAShift
, uint32_t aDstAShift
>
796 static void SwizzleSwap(const uint8_t* aSrc
, int32_t aSrcGap
, uint8_t* aDst
,
797 int32_t aDstGap
, IntSize aSize
) {
798 for (int32_t height
= aSize
.height
; height
> 0; height
--) {
799 SwizzleChunkSwap
<aOpaqueAlpha
, aSrcAShift
, aDstAShift
>(aSrc
, aDst
,
806 #define SWIZZLE_SWAP(aSrcFormat, aDstFormat) \
808 aSrcFormat, aDstFormat, \
809 SwizzleSwap<ShouldForceOpaque(aSrcFormat, aDstFormat), \
810 AlphaBitShift(aSrcFormat), AlphaBitShift(aDstFormat)>)
812 #define SWIZZLE_ROW_SWAP(aSrcFormat, aDstFormat) \
814 aSrcFormat, aDstFormat, \
815 SwizzleRowSwap<ShouldForceOpaque(aSrcFormat, aDstFormat), \
816 AlphaBitShift(aSrcFormat), AlphaBitShift(aDstFormat)>)
818 static void SwizzleChunkSwapRGB24(const uint8_t*& aSrc
, uint8_t*& aDst
,
820 const uint8_t* end
= aSrc
+ 3 * aLength
;
830 } while (aSrc
< end
);
833 static void SwizzleRowSwapRGB24(const uint8_t* aSrc
, uint8_t* aDst
,
835 SwizzleChunkSwapRGB24(aSrc
, aDst
, aLength
);
838 static void SwizzleSwapRGB24(const uint8_t* aSrc
, int32_t aSrcGap
,
839 uint8_t* aDst
, int32_t aDstGap
, IntSize aSize
) {
840 for (int32_t height
= aSize
.height
; height
> 0; height
--) {
841 SwizzleChunkSwapRGB24(aSrc
, aDst
, aSize
.width
);
847 #define SWIZZLE_SWAP_RGB24(aSrcFormat, aDstFormat) \
848 FORMAT_CASE(aSrcFormat, aDstFormat, SwizzleSwapRGB24)
850 #define SWIZZLE_ROW_SWAP_RGB24(aSrcFormat, aDstFormat) \
851 FORMAT_CASE_ROW(aSrcFormat, aDstFormat, SwizzleRowSwapRGB24)
853 // Fast-path for conversions that force alpha to opaque.
854 template <uint32_t aDstAShift
>
855 static void SwizzleChunkOpaqueUpdate(uint8_t*& aBuffer
, int32_t aLength
) {
856 const uint8_t* end
= aBuffer
+ 4 * aLength
;
858 uint32_t rgba
= *reinterpret_cast<const uint32_t*>(aBuffer
);
859 // Just add on the alpha bits to the source.
860 rgba
|= 0xFF << aDstAShift
;
861 *reinterpret_cast<uint32_t*>(aBuffer
) = rgba
;
863 } while (aBuffer
< end
);
866 template <uint32_t aDstAShift
>
867 static void SwizzleChunkOpaqueCopy(const uint8_t*& aSrc
, uint8_t* aDst
,
869 const uint8_t* end
= aSrc
+ 4 * aLength
;
871 uint32_t rgba
= *reinterpret_cast<const uint32_t*>(aSrc
);
872 // Just add on the alpha bits to the source.
873 rgba
|= 0xFF << aDstAShift
;
874 *reinterpret_cast<uint32_t*>(aDst
) = rgba
;
877 } while (aSrc
< end
);
880 template <uint32_t aDstAShift
>
881 static void SwizzleRowOpaque(const uint8_t* aSrc
, uint8_t* aDst
,
884 SwizzleChunkOpaqueUpdate
<aDstAShift
>(aDst
, aLength
);
886 SwizzleChunkOpaqueCopy
<aDstAShift
>(aSrc
, aDst
, aLength
);
890 template <uint32_t aDstAShift
>
891 static void SwizzleOpaque(const uint8_t* aSrc
, int32_t aSrcGap
, uint8_t* aDst
,
892 int32_t aDstGap
, IntSize aSize
) {
894 // Modifying in-place, so just write out the alpha.
895 for (int32_t height
= aSize
.height
; height
> 0; height
--) {
896 SwizzleChunkOpaqueUpdate
<aDstAShift
>(aDst
, aSize
.width
);
900 for (int32_t height
= aSize
.height
; height
> 0; height
--) {
901 SwizzleChunkOpaqueCopy
<aDstAShift
>(aSrc
, aDst
, aSize
.width
);
908 #define SWIZZLE_OPAQUE(aSrcFormat, aDstFormat) \
909 FORMAT_CASE(aSrcFormat, aDstFormat, SwizzleOpaque<AlphaBitShift(aDstFormat)>)
911 #define SWIZZLE_ROW_OPAQUE(aSrcFormat, aDstFormat) \
912 FORMAT_CASE_ROW(aSrcFormat, aDstFormat, \
913 SwizzleRowOpaque<AlphaBitShift(aDstFormat)>)
915 // Packing of 32-bit formats to RGB565.
916 template <bool aSwapRB
, uint32_t aSrcRGBShift
, uint32_t aSrcRGBIndex
>
917 static void PackToRGB565(const uint8_t* aSrc
, int32_t aSrcGap
, uint8_t* aDst
,
918 int32_t aDstGap
, IntSize aSize
) {
919 for (int32_t height
= aSize
.height
; height
> 0; height
--) {
920 const uint8_t* end
= aSrc
+ 4 * aSize
.width
;
922 uint32_t rgba
= *reinterpret_cast<const uint32_t*>(aSrc
);
924 // Isolate the R, G, and B components and shift to final endian-dependent
928 rgb565
= ((rgba
& (0xF8 << aSrcRGBShift
)) << (8 - aSrcRGBShift
)) |
929 ((rgba
& (0xFC00 << aSrcRGBShift
)) >> (5 + aSrcRGBShift
)) |
930 ((rgba
& (0xF80000 << aSrcRGBShift
)) >> (19 + aSrcRGBShift
));
932 rgb565
= ((rgba
& (0xF8 << aSrcRGBShift
)) >> (3 + aSrcRGBShift
)) |
933 ((rgba
& (0xFC00 << aSrcRGBShift
)) >> (5 + aSrcRGBShift
)) |
934 ((rgba
& (0xF80000 << aSrcRGBShift
)) >> (8 + aSrcRGBShift
));
937 *reinterpret_cast<uint16_t*>(aDst
) = rgb565
;
941 } while (aSrc
< end
);
948 // Packing of 32-bit formats to 24-bit formats.
949 template <bool aSwapRB
, uint32_t aSrcRGBShift
, uint32_t aSrcRGBIndex
>
950 static void PackChunkToRGB24(const uint8_t*& aSrc
, uint8_t*& aDst
,
952 const uint8_t* end
= aSrc
+ 4 * aLength
;
954 uint8_t r
= aSrc
[aSrcRGBIndex
+ (aSwapRB
? 2 : 0)];
955 uint8_t g
= aSrc
[aSrcRGBIndex
+ 1];
956 uint8_t b
= aSrc
[aSrcRGBIndex
+ (aSwapRB
? 0 : 2)];
964 } while (aSrc
< end
);
967 template <bool aSwapRB
, uint32_t aSrcRGBShift
, uint32_t aSrcRGBIndex
>
968 static void PackRowToRGB24(const uint8_t* aSrc
, uint8_t* aDst
,
970 PackChunkToRGB24
<aSwapRB
, aSrcRGBShift
, aSrcRGBIndex
>(aSrc
, aDst
, aLength
);
973 template <bool aSwapRB
, uint32_t aSrcRGBShift
, uint32_t aSrcRGBIndex
>
974 static void PackToRGB24(const uint8_t* aSrc
, int32_t aSrcGap
, uint8_t* aDst
,
975 int32_t aDstGap
, IntSize aSize
) {
976 for (int32_t height
= aSize
.height
; height
> 0; height
--) {
977 PackChunkToRGB24
<aSwapRB
, aSrcRGBShift
, aSrcRGBIndex
>(aSrc
, aDst
,
984 #define PACK_RGB_CASE(aSrcFormat, aDstFormat, aPackFunc) \
985 FORMAT_CASE(aSrcFormat, aDstFormat, \
986 aPackFunc<ShouldSwapRB(aSrcFormat, aDstFormat), \
987 RGBBitShift(aSrcFormat), RGBByteIndex(aSrcFormat)>)
989 #define PACK_RGB(aDstFormat, aPackFunc) \
990 PACK_RGB_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \
991 PACK_RGB_CASE(SurfaceFormat::B8G8R8X8, aDstFormat, aPackFunc) \
992 PACK_RGB_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \
993 PACK_RGB_CASE(SurfaceFormat::R8G8B8X8, aDstFormat, aPackFunc) \
994 PACK_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \
995 PACK_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc)
997 #define PACK_ROW_RGB_CASE(aSrcFormat, aDstFormat, aPackFunc) \
999 aSrcFormat, aDstFormat, \
1000 aPackFunc<ShouldSwapRB(aSrcFormat, aDstFormat), RGBBitShift(aSrcFormat), \
1001 RGBByteIndex(aSrcFormat)>)
1003 #define PACK_ROW_RGB(aDstFormat, aPackFunc) \
1004 PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \
1005 PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8X8, aDstFormat, aPackFunc) \
1006 PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \
1007 PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8X8, aDstFormat, aPackFunc) \
1008 PACK_ROW_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \
1009 PACK_ROW_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc)
1011 // Packing of 32-bit formats to A8.
1012 template <uint32_t aSrcAIndex
>
1013 static void PackToA8(const uint8_t* aSrc
, int32_t aSrcGap
, uint8_t* aDst
,
1014 int32_t aDstGap
, IntSize aSize
) {
1015 for (int32_t height
= aSize
.height
; height
> 0; height
--) {
1016 const uint8_t* end
= aSrc
+ 4 * aSize
.width
;
1018 *aDst
++ = aSrc
[aSrcAIndex
];
1020 } while (aSrc
< end
);
1026 #define PACK_ALPHA_CASE(aSrcFormat, aDstFormat, aPackFunc) \
1027 FORMAT_CASE(aSrcFormat, aDstFormat, aPackFunc<AlphaByteIndex(aSrcFormat)>)
1029 #define PACK_ALPHA(aDstFormat, aPackFunc) \
1030 PACK_ALPHA_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \
1031 PACK_ALPHA_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \
1032 PACK_ALPHA_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc)
1034 template <bool aSwapRB
>
1035 void UnpackRowRGB24(const uint8_t* aSrc
, uint8_t* aDst
, int32_t aLength
) {
1036 // Because we are expanding, we can only process the data back to front in
1037 // case we are performing this in place.
1038 const uint8_t* src
= aSrc
+ 3 * (aLength
- 1);
1039 uint32_t* dst
= reinterpret_cast<uint32_t*>(aDst
+ 4 * aLength
);
1040 while (src
>= aSrc
) {
1041 uint8_t r
= src
[aSwapRB
? 2 : 0];
1043 uint8_t b
= src
[aSwapRB
? 0 : 2];
1044 #if MOZ_LITTLE_ENDIAN()
1045 *--dst
= 0xFF000000 | (b
<< 16) | (g
<< 8) | r
;
1047 *--dst
= 0x000000FF | (b
<< 8) | (g
<< 16) | (r
<< 24);
1053 // Force instantiation of swizzle variants here.
1054 template void UnpackRowRGB24
<false>(const uint8_t*, uint8_t*, int32_t);
1055 template void UnpackRowRGB24
<true>(const uint8_t*, uint8_t*, int32_t);
1057 #define UNPACK_ROW_RGB(aDstFormat) \
1059 SurfaceFormat::R8G8B8, aDstFormat, \
1060 UnpackRowRGB24<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>)
1062 static void UnpackRowRGB24_To_ARGB(const uint8_t* aSrc
, uint8_t* aDst
,
1064 // Because we are expanding, we can only process the data back to front in
1065 // case we are performing this in place.
1066 const uint8_t* src
= aSrc
+ 3 * (aLength
- 1);
1067 uint32_t* dst
= reinterpret_cast<uint32_t*>(aDst
+ 4 * aLength
);
1068 while (src
>= aSrc
) {
1072 #if MOZ_LITTLE_ENDIAN()
1073 *--dst
= 0x000000FF | (r
<< 8) | (g
<< 16) | (b
<< 24);
1075 *--dst
= 0xFF000000 | (r
<< 24) | (g
<< 16) | b
;
1081 #define UNPACK_ROW_RGB_TO_ARGB(aDstFormat) \
1082 FORMAT_CASE_ROW(SurfaceFormat::R8G8B8, aDstFormat, UnpackRowRGB24_To_ARGB)
1084 bool SwizzleData(const uint8_t* aSrc
, int32_t aSrcStride
,
1085 SurfaceFormat aSrcFormat
, uint8_t* aDst
, int32_t aDstStride
,
1086 SurfaceFormat aDstFormat
, const IntSize
& aSize
) {
1087 if (aSize
.IsEmpty()) {
1090 IntSize size
= CollapseSize(aSize
, aSrcStride
, aDstStride
);
1091 // Find gap from end of row to the start of the next row.
1092 int32_t srcGap
= GetStrideGap(aSize
.width
, aSrcFormat
, aSrcStride
);
1093 int32_t dstGap
= GetStrideGap(aSize
.width
, aDstFormat
, aDstStride
);
1094 MOZ_ASSERT(srcGap
>= 0 && dstGap
>= 0);
1095 if (srcGap
< 0 || dstGap
< 0) {
1099 #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size)
1102 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
1103 SWIZZLE_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8A8
)
1104 SWIZZLE_SSE2(SurfaceFormat::B8G8R8X8
, SurfaceFormat::R8G8B8X8
)
1105 SWIZZLE_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8X8
)
1106 SWIZZLE_SSE2(SurfaceFormat::B8G8R8X8
, SurfaceFormat::R8G8B8A8
)
1107 SWIZZLE_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8A8
)
1108 SWIZZLE_SSE2(SurfaceFormat::R8G8B8X8
, SurfaceFormat::B8G8R8X8
)
1109 SWIZZLE_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8X8
)
1110 SWIZZLE_SSE2(SurfaceFormat::R8G8B8X8
, SurfaceFormat::B8G8R8A8
)
1117 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
1118 SWIZZLE_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8A8
)
1119 SWIZZLE_NEON(SurfaceFormat::B8G8R8X8
, SurfaceFormat::R8G8B8X8
)
1120 SWIZZLE_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8X8
)
1121 SWIZZLE_NEON(SurfaceFormat::B8G8R8X8
, SurfaceFormat::R8G8B8A8
)
1122 SWIZZLE_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8A8
)
1123 SWIZZLE_NEON(SurfaceFormat::R8G8B8X8
, SurfaceFormat::B8G8R8X8
)
1124 SWIZZLE_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8X8
)
1125 SWIZZLE_NEON(SurfaceFormat::R8G8B8X8
, SurfaceFormat::B8G8R8A8
)
1131 switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
1132 SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8A8
)
1133 SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8X8
, SurfaceFormat::R8G8B8X8
)
1134 SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8X8
)
1135 SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8X8
, SurfaceFormat::R8G8B8A8
)
1137 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8A8
)
1138 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8
, SurfaceFormat::B8G8R8X8
)
1139 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8X8
)
1140 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8
, SurfaceFormat::B8G8R8A8
)
1141 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8
, SurfaceFormat::A8R8G8B8
)
1142 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8
, SurfaceFormat::X8R8G8B8
)
1144 SWIZZLE_FALLBACK(SurfaceFormat::A8R8G8B8
, SurfaceFormat::R8G8B8A8
)
1145 SWIZZLE_FALLBACK(SurfaceFormat::X8R8G8B8
, SurfaceFormat::R8G8B8X8
)
1146 SWIZZLE_FALLBACK(SurfaceFormat::A8R8G8B8
, SurfaceFormat::R8G8B8X8
)
1147 SWIZZLE_FALLBACK(SurfaceFormat::X8R8G8B8
, SurfaceFormat::R8G8B8A8
)
1149 SWIZZLE_SWAP(SurfaceFormat::B8G8R8A8
, SurfaceFormat::A8R8G8B8
)
1150 SWIZZLE_SWAP(SurfaceFormat::B8G8R8A8
, SurfaceFormat::X8R8G8B8
)
1151 SWIZZLE_SWAP(SurfaceFormat::B8G8R8X8
, SurfaceFormat::X8R8G8B8
)
1152 SWIZZLE_SWAP(SurfaceFormat::B8G8R8X8
, SurfaceFormat::A8R8G8B8
)
1153 SWIZZLE_SWAP(SurfaceFormat::A8R8G8B8
, SurfaceFormat::B8G8R8A8
)
1154 SWIZZLE_SWAP(SurfaceFormat::A8R8G8B8
, SurfaceFormat::B8G8R8X8
)
1155 SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8
, SurfaceFormat::B8G8R8X8
)
1156 SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8
, SurfaceFormat::B8G8R8A8
)
1158 SWIZZLE_SWAP_RGB24(SurfaceFormat::R8G8B8
, SurfaceFormat::B8G8R8
)
1159 SWIZZLE_SWAP_RGB24(SurfaceFormat::B8G8R8
, SurfaceFormat::R8G8B8
)
1161 SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8A8
, SurfaceFormat::B8G8R8X8
)
1162 SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8X8
, SurfaceFormat::B8G8R8A8
)
1163 SWIZZLE_OPAQUE(SurfaceFormat::R8G8B8A8
, SurfaceFormat::R8G8B8X8
)
1164 SWIZZLE_OPAQUE(SurfaceFormat::R8G8B8X8
, SurfaceFormat::R8G8B8A8
)
1165 SWIZZLE_OPAQUE(SurfaceFormat::A8R8G8B8
, SurfaceFormat::X8R8G8B8
)
1166 SWIZZLE_OPAQUE(SurfaceFormat::X8R8G8B8
, SurfaceFormat::A8R8G8B8
)
1168 PACK_RGB(SurfaceFormat::R5G6B5_UINT16
, PackToRGB565
)
1169 PACK_RGB(SurfaceFormat::B8G8R8
, PackToRGB24
)
1170 PACK_RGB(SurfaceFormat::R8G8B8
, PackToRGB24
)
1171 PACK_ALPHA(SurfaceFormat::A8
, PackToA8
)
1177 if (aSrcFormat
== aDstFormat
) {
1178 // If the formats match, just do a generic copy.
1179 SwizzleCopy(aSrc
, srcGap
, aDst
, dstGap
, size
, BytesPerPixel(aSrcFormat
));
1183 #undef FORMAT_CASE_CALL
1185 MOZ_ASSERT(false, "Unsupported swizzle formats");
1189 SwizzleRowFn
SwizzleRow(SurfaceFormat aSrcFormat
, SurfaceFormat aDstFormat
) {
1191 if (mozilla::supports_avx2()) switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
1192 UNPACK_ROW_RGB_AVX2(SurfaceFormat::R8G8B8X8
)
1193 UNPACK_ROW_RGB_AVX2(SurfaceFormat::R8G8B8A8
)
1194 UNPACK_ROW_RGB_AVX2(SurfaceFormat::B8G8R8X8
)
1195 UNPACK_ROW_RGB_AVX2(SurfaceFormat::B8G8R8A8
)
1200 if (mozilla::supports_ssse3()) switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
1201 UNPACK_ROW_RGB_SSSE3(SurfaceFormat::R8G8B8X8
)
1202 UNPACK_ROW_RGB_SSSE3(SurfaceFormat::R8G8B8A8
)
1203 UNPACK_ROW_RGB_SSSE3(SurfaceFormat::B8G8R8X8
)
1204 UNPACK_ROW_RGB_SSSE3(SurfaceFormat::B8G8R8A8
)
1209 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
1210 SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8A8
)
1211 SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8X8
, SurfaceFormat::R8G8B8X8
)
1212 SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8X8
)
1213 SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8X8
, SurfaceFormat::R8G8B8A8
)
1214 SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8A8
)
1215 SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8X8
, SurfaceFormat::B8G8R8X8
)
1216 SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8X8
)
1217 SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8X8
, SurfaceFormat::B8G8R8A8
)
1224 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
1225 UNPACK_ROW_RGB_NEON(SurfaceFormat::R8G8B8X8
)
1226 UNPACK_ROW_RGB_NEON(SurfaceFormat::R8G8B8A8
)
1227 UNPACK_ROW_RGB_NEON(SurfaceFormat::B8G8R8X8
)
1228 UNPACK_ROW_RGB_NEON(SurfaceFormat::B8G8R8A8
)
1229 SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8A8
)
1230 SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8X8
, SurfaceFormat::R8G8B8X8
)
1231 SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8X8
)
1232 SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8X8
, SurfaceFormat::R8G8B8A8
)
1233 SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8A8
)
1234 SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8X8
, SurfaceFormat::B8G8R8X8
)
1235 SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8X8
)
1236 SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8X8
, SurfaceFormat::B8G8R8A8
)
1242 switch (FORMAT_KEY(aSrcFormat
, aDstFormat
)) {
1243 SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8A8
)
1244 SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8X8
, SurfaceFormat::R8G8B8X8
)
1245 SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8A8
, SurfaceFormat::R8G8B8X8
)
1246 SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8X8
, SurfaceFormat::R8G8B8A8
)
1248 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8A8
)
1249 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8X8
, SurfaceFormat::B8G8R8X8
)
1250 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8A8
, SurfaceFormat::B8G8R8X8
)
1251 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8X8
, SurfaceFormat::B8G8R8A8
)
1252 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8A8
, SurfaceFormat::A8R8G8B8
)
1253 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8X8
, SurfaceFormat::X8R8G8B8
)
1255 SWIZZLE_ROW_FALLBACK(SurfaceFormat::A8R8G8B8
, SurfaceFormat::R8G8B8A8
)
1256 SWIZZLE_ROW_FALLBACK(SurfaceFormat::X8R8G8B8
, SurfaceFormat::R8G8B8X8
)
1257 SWIZZLE_ROW_FALLBACK(SurfaceFormat::A8R8G8B8
, SurfaceFormat::R8G8B8X8
)
1258 SWIZZLE_ROW_FALLBACK(SurfaceFormat::X8R8G8B8
, SurfaceFormat::R8G8B8A8
)
1260 SWIZZLE_ROW_OPAQUE(SurfaceFormat::B8G8R8A8
, SurfaceFormat::B8G8R8X8
)
1261 SWIZZLE_ROW_OPAQUE(SurfaceFormat::B8G8R8X8
, SurfaceFormat::B8G8R8A8
)
1262 SWIZZLE_ROW_OPAQUE(SurfaceFormat::R8G8B8A8
, SurfaceFormat::R8G8B8X8
)
1263 SWIZZLE_ROW_OPAQUE(SurfaceFormat::R8G8B8X8
, SurfaceFormat::R8G8B8A8
)
1264 SWIZZLE_ROW_OPAQUE(SurfaceFormat::A8R8G8B8
, SurfaceFormat::X8R8G8B8
)
1265 SWIZZLE_ROW_OPAQUE(SurfaceFormat::X8R8G8B8
, SurfaceFormat::A8R8G8B8
)
1267 SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8A8
, SurfaceFormat::A8R8G8B8
)
1268 SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8A8
, SurfaceFormat::X8R8G8B8
)
1269 SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8X8
, SurfaceFormat::X8R8G8B8
)
1270 SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8X8
, SurfaceFormat::A8R8G8B8
)
1271 SWIZZLE_ROW_SWAP(SurfaceFormat::A8R8G8B8
, SurfaceFormat::B8G8R8A8
)
1272 SWIZZLE_ROW_SWAP(SurfaceFormat::A8R8G8B8
, SurfaceFormat::B8G8R8X8
)
1273 SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8
, SurfaceFormat::B8G8R8X8
)
1274 SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8
, SurfaceFormat::B8G8R8A8
)
1276 SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::R8G8B8
, SurfaceFormat::B8G8R8
)
1277 SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::B8G8R8
, SurfaceFormat::R8G8B8
)
1279 UNPACK_ROW_RGB(SurfaceFormat::R8G8B8X8
)
1280 UNPACK_ROW_RGB(SurfaceFormat::R8G8B8A8
)
1281 UNPACK_ROW_RGB(SurfaceFormat::B8G8R8X8
)
1282 UNPACK_ROW_RGB(SurfaceFormat::B8G8R8A8
)
1283 UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::A8R8G8B8
)
1284 UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::X8R8G8B8
)
1286 PACK_ROW_RGB(SurfaceFormat::R8G8B8
, PackRowToRGB24
)
1287 PACK_ROW_RGB(SurfaceFormat::B8G8R8
, PackRowToRGB24
)
1293 if (aSrcFormat
== aDstFormat
) {
1294 switch (BytesPerPixel(aSrcFormat
)) {
1296 return &SwizzleRowCopy
<4>;
1298 return &SwizzleRowCopy
<3>;
1304 MOZ_ASSERT_UNREACHABLE("Unsupported swizzle formats");
1309 } // namespace mozilla