Re-sync with internal repository
[hiphop-php.git] / third-party / folly / src / folly / hash / detail / Crc32CombineDetail.cpp
blob806910a383edbe3e7276809a12356859939d17cb
1 /*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <array>
18 #include <utility>
20 #include <folly/Bits.h>
21 #include <folly/hash/detail/ChecksumDetail.h>
23 namespace folly {
25 // Standard galois-field multiply. The only modification is that a,
26 // b, m, and p are all bit-reflected.
28 // https://en.wikipedia.org/wiki/Finite_field_arithmetic
29 static constexpr uint32_t gf_multiply_sw_1(
30 size_t i, uint32_t p, uint32_t a, uint32_t b, uint32_t m) {
31 // clang-format off
32 return i == 32 ? p : gf_multiply_sw_1(
33 /* i = */ i + 1,
34 /* p = */ p ^ (-((b >> 31) & 1) & a),
35 /* a = */ (a >> 1) ^ (-(a & 1) & m),
36 /* b = */ b << 1,
37 /* m = */ m);
38 // clang-format on
40 static constexpr uint32_t gf_multiply_sw(uint32_t a, uint32_t b, uint32_t m) {
41 return gf_multiply_sw_1(/* i = */ 0, /* p = */ 0, a, b, m);
44 static constexpr uint32_t gf_square_sw(uint32_t a, uint32_t m) {
45 return gf_multiply_sw(a, a, m);
48 namespace {
50 template <size_t i, uint32_t m>
51 struct gf_powers_memo {
52 static constexpr uint32_t value =
53 gf_square_sw(gf_powers_memo<i - 1, m>::value, m);
55 template <uint32_t m>
56 struct gf_powers_memo<0, m> {
57 static constexpr uint32_t value = m;
60 template <uint32_t m>
61 struct gf_powers_make {
62 template <size_t... i>
63 constexpr auto operator()(std::index_sequence<i...>) const {
64 return std::array<uint32_t, sizeof...(i)>{{gf_powers_memo<i, m>::value...}};
68 } // namespace
70 #if FOLLY_SSE_PREREQ(4, 2)
72 // Reduction taken from
73 // https://www.nicst.de/crc.pdf
75 // This is an intrinsics-based implementation of listing 3.
76 static uint32_t gf_multiply_crc32c_hw(uint64_t crc1, uint64_t crc2, uint32_t) {
77 const auto crc1_xmm = _mm_set_epi64x(0, crc1);
78 const auto crc2_xmm = _mm_set_epi64x(0, crc2);
79 const auto count = _mm_set_epi64x(0, 1);
80 const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00);
81 const auto res1 = _mm_sll_epi64(res0, count);
83 // Use hardware crc32c to do reduction from 64 -> 32 bytes
84 const auto res2 = _mm_cvtsi128_si64(res1);
85 const auto res3 = _mm_crc32_u32(0, res2);
86 const auto res4 = _mm_extract_epi32(res1, 1);
87 return res3 ^ res4;
90 static uint32_t gf_multiply_crc32_hw(uint64_t crc1, uint64_t crc2, uint32_t) {
91 const auto crc1_xmm = _mm_set_epi64x(0, crc1);
92 const auto crc2_xmm = _mm_set_epi64x(0, crc2);
93 const auto count = _mm_set_epi64x(0, 1);
94 const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00);
95 const auto res1 = _mm_sll_epi64(res0, count);
97 // Do barrett reduction of 64 -> 32 bytes
98 const auto mask32 = _mm_set_epi32(0, 0, 0, 0xFFFFFFFF);
99 const auto barrett_reduction_constants =
100 _mm_set_epi32(0x1, 0xDB710641, 0x1, 0xF7011641);
101 const auto res2 = _mm_clmulepi64_si128(
102 _mm_and_si128(res1, mask32), barrett_reduction_constants, 0x00);
103 const auto res3 = _mm_clmulepi64_si128(
104 _mm_and_si128(res2, mask32), barrett_reduction_constants, 0x10);
105 return _mm_cvtsi128_si32(_mm_srli_si128(_mm_xor_si128(res3, res1), 4));
108 #else
110 static uint32_t gf_multiply_crc32c_hw(uint64_t, uint64_t, uint32_t) {
111 return 0;
113 static uint32_t gf_multiply_crc32_hw(uint64_t, uint64_t, uint32_t) {
114 return 0;
117 #endif
119 static constexpr uint32_t crc32c_m = 0x82f63b78;
120 static constexpr uint32_t crc32_m = 0xedb88320;
123 * Pre-calculated powers tables for crc32c and crc32.
125 static constexpr std::array<uint32_t, 62> const crc32c_powers =
126 gf_powers_make<crc32c_m>{}(std::make_index_sequence<62>{});
127 static constexpr std::array<uint32_t, 62> const crc32_powers =
128 gf_powers_make<crc32_m>{}(std::make_index_sequence<62>{});
130 template <typename F>
131 static uint32_t crc32_append_zeroes(
132 F mult,
133 uint32_t crc,
134 size_t len,
135 uint32_t polynomial,
136 std::array<uint32_t, 62> const& powers_array) {
137 auto powers = powers_array.data();
139 // Append by multiplying by consecutive powers of two of the zeroes
140 // array
141 len >>= 2;
143 while (len) {
144 // Advance directly to next bit set.
145 auto r = findFirstSet(len) - 1;
146 len >>= r;
147 powers += r;
149 crc = mult(crc, *powers, polynomial);
151 len >>= 1;
152 powers++;
155 return crc;
158 namespace detail {
160 uint32_t crc32_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
161 return crc2 ^
162 crc32_append_zeroes(gf_multiply_sw, crc1, crc2len, crc32_m, crc32_powers);
165 uint32_t crc32_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
166 return crc2 ^
167 crc32_append_zeroes(
168 gf_multiply_crc32_hw, crc1, crc2len, crc32_m, crc32_powers);
171 uint32_t crc32c_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
172 return crc2 ^
173 crc32_append_zeroes(
174 gf_multiply_sw, crc1, crc2len, crc32c_m, crc32c_powers);
177 uint32_t crc32c_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
178 return crc2 ^
179 crc32_append_zeroes(
180 gf_multiply_crc32c_hw, crc1, crc2len, crc32c_m, crc32c_powers);
183 } // namespace detail
185 } // namespace folly