1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
10 #include <tmmintrin.h>
12 namespace mozilla::gfx
{
14 template <bool aSwapRB
>
15 void UnpackRowRGB24(const uint8_t* aSrc
, uint8_t* aDst
, int32_t aLength
);
17 template <bool aSwapRB
>
18 void UnpackRowRGB24_SSSE3(const uint8_t* aSrc
, uint8_t* aDst
, int32_t aLength
) {
19 // Because this implementation will read an additional 4 bytes of data that
20 // is ignored and masked over, we cannot use the accelerated version for the
21 // last 1-5 pixels (3-15 bytes remaining) to guarantee we don't access memory
22 // outside the buffer (we read in 16 byte chunks).
24 UnpackRowRGB24
<aSwapRB
>(aSrc
, aDst
, aLength
);
28 // Because we are expanding, we can only process the data back to front in
29 // case we are performing this in place.
30 int32_t alignedRow
= (aLength
- 2) & ~3;
31 int32_t remainder
= aLength
- alignedRow
;
33 const uint8_t* src
= aSrc
+ alignedRow
* 3;
34 uint8_t* dst
= aDst
+ alignedRow
* 4;
36 // Handle 2-5 remaining pixels.
37 UnpackRowRGB24
<aSwapRB
>(src
, dst
, remainder
);
41 mask
= _mm_set_epi8(15, 9, 10, 11, 14, 6, 7, 8, 13, 3, 4, 5, 12, 0, 1, 2);
43 mask
= _mm_set_epi8(15, 11, 10, 9, 14, 8, 7, 6, 13, 5, 4, 3, 12, 2, 1, 0);
46 __m128i alpha
= _mm_set1_epi32(0xFF000000);
48 // Process all 4-pixel chunks as one vector.
52 __m128i px
= _mm_loadu_si128(reinterpret_cast<const __m128i
*>(src
));
53 px
= _mm_shuffle_epi8(px
, mask
);
54 px
= _mm_or_si128(px
, alpha
);
55 _mm_storeu_si128(reinterpret_cast<__m128i
*>(dst
), px
);
61 // Force instantiation of swizzle variants here.
62 template void UnpackRowRGB24_SSSE3
<false>(const uint8_t*, uint8_t*, int32_t);
63 template void UnpackRowRGB24_SSSE3
<true>(const uint8_t*, uint8_t*, int32_t);
65 } // namespace mozilla::gfx