Update configs. IGNORE BROKEN CHANGESETS CLOSED TREE NO BUG a=release ba=release
[gecko.git] / gfx / thebes / gfxAlphaRecoveryGeneric.h
blob84db0fea0e177ff9743a16b41ec6a1a7a525871a
1 /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #ifndef _GFXALPHARECOVERY_GENERIC_H_
6 #define _GFXALPHARECOVERY_GENERIC_H_
8 #include "gfxAlphaRecovery.h"
9 #include "gfxImageSurface.h"
10 #include "nsDebug.h"
11 #include <xsimd/xsimd.hpp>
13 template <typename Arch>
14 bool gfxAlphaRecovery::RecoverAlphaGeneric(gfxImageSurface* blackSurf,
15 const gfxImageSurface* whiteSurf) {
16 mozilla::gfx::IntSize size = blackSurf->GetSize();
18 if (size != whiteSurf->GetSize() ||
19 (blackSurf->Format() != mozilla::gfx::SurfaceFormat::A8R8G8B8_UINT32 &&
20 blackSurf->Format() != mozilla::gfx::SurfaceFormat::X8R8G8B8_UINT32) ||
21 (whiteSurf->Format() != mozilla::gfx::SurfaceFormat::A8R8G8B8_UINT32 &&
22 whiteSurf->Format() != mozilla::gfx::SurfaceFormat::X8R8G8B8_UINT32))
23 return false;
25 blackSurf->Flush();
26 whiteSurf->Flush();
28 unsigned char* blackData = blackSurf->Data();
29 unsigned char* whiteData = whiteSurf->Data();
31 if ((NS_PTR_TO_UINT32(blackData) & 0xf) !=
32 (NS_PTR_TO_UINT32(whiteData) & 0xf) ||
33 (blackSurf->Stride() - whiteSurf->Stride()) & 0xf) {
34 // Cannot keep these in alignment.
35 return false;
38 alignas(Arch::alignment()) static const uint8_t greenMaski[] = {
39 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
40 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
42 alignas(Arch::alignment()) static const uint8_t alphaMaski[] = {
43 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
44 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
47 using batch_type = xsimd::batch<uint8_t, Arch>;
48 constexpr size_t batch_size = batch_type::size;
49 static_assert(batch_size == 16);
51 batch_type greenMask = batch_type::load_aligned(greenMaski);
52 batch_type alphaMask = batch_type::load_aligned(alphaMaski);
54 for (int32_t i = 0; i < size.height; ++i) {
55 int32_t j = 0;
56 // Loop single pixels until at 4 byte alignment.
57 while (NS_PTR_TO_UINT32(blackData) & 0xf && j < size.width) {
58 *((uint32_t*)blackData) =
59 RecoverPixel(*reinterpret_cast<uint32_t*>(blackData),
60 *reinterpret_cast<uint32_t*>(whiteData));
61 blackData += 4;
62 whiteData += 4;
63 j++;
65 // This extra loop allows the compiler to do some more clever registry
66 // management and makes it about 5% faster than with only the 4 pixel
67 // at a time loop.
68 for (; j < size.width - 8; j += 8) {
69 auto black1 = batch_type::load_aligned(blackData);
70 auto white1 = batch_type::load_aligned(whiteData);
71 auto black2 = batch_type::load_aligned(blackData + batch_size);
72 auto white2 = batch_type::load_aligned(whiteData + batch_size);
74 // Execute the same instructions as described in RecoverPixel, only
75 // using an SSE2 packed saturated subtract.
76 white1 = xsimd::ssub(white1, black1);
77 white2 = xsimd::ssub(white2, black2);
78 white1 = xsimd::ssub(greenMask, white1);
79 white2 = xsimd::ssub(greenMask, white2);
80 // Producing the final black pixel in an XMM register and storing
81 // that is actually faster than doing a masked store since that
82 // does an unaligned storage. We have the black pixel in a register
83 // anyway.
84 black1 = xsimd::bitwise_andnot(black1, alphaMask);
85 black2 = xsimd::bitwise_andnot(black2, alphaMask);
86 white1 = xsimd::slide_left<2>(white1);
87 white2 = xsimd::slide_left<2>(white2);
88 white1 &= alphaMask;
89 white2 &= alphaMask;
90 black1 |= white1;
91 black2 |= white2;
93 black1.store_aligned(blackData);
94 black2.store_aligned(blackData + batch_size);
95 blackData += 2 * batch_size;
96 whiteData += 2 * batch_size;
98 for (; j < size.width - 4; j += 4) {
99 auto black = batch_type::load_aligned(blackData);
100 auto white = batch_type::load_aligned(whiteData);
102 white = xsimd::ssub(white, black);
103 white = xsimd::ssub(greenMask, white);
104 black = xsimd::bitwise_andnot(black, alphaMask);
105 white = xsimd::slide_left<2>(white);
106 white &= alphaMask;
107 black |= white;
108 black.store_aligned(blackData);
109 blackData += batch_size;
110 whiteData += batch_size;
112 // Loop single pixels until we're done.
113 while (j < size.width) {
114 *((uint32_t*)blackData) =
115 RecoverPixel(*reinterpret_cast<uint32_t*>(blackData),
116 *reinterpret_cast<uint32_t*>(whiteData));
117 blackData += 4;
118 whiteData += 4;
119 j++;
121 blackData += blackSurf->Stride() - j * 4;
122 whiteData += whiteSurf->Stride() - j * 4;
125 blackSurf->MarkDirty();
127 return true;
129 #endif