2 * Copyright (c) Meta Platforms, Inc. and affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
20 #include <folly/Bits.h>
21 #include <folly/hash/detail/ChecksumDetail.h>
25 // Standard galois-field multiply. The only modification is that a,
26 // b, m, and p are all bit-reflected.
28 // https://en.wikipedia.org/wiki/Finite_field_arithmetic
29 static constexpr uint32_t gf_multiply_sw_1(
30 size_t i
, uint32_t p
, uint32_t a
, uint32_t b
, uint32_t m
) {
32 return i
== 32 ? p
: gf_multiply_sw_1(
34 /* p = */ p
^ (-((b
>> 31) & 1) & a
),
35 /* a = */ (a
>> 1) ^ (-(a
& 1) & m
),
40 static constexpr uint32_t gf_multiply_sw(uint32_t a
, uint32_t b
, uint32_t m
) {
41 return gf_multiply_sw_1(/* i = */ 0, /* p = */ 0, a
, b
, m
);
44 static constexpr uint32_t gf_square_sw(uint32_t a
, uint32_t m
) {
45 return gf_multiply_sw(a
, a
, m
);
50 template <size_t i
, uint32_t m
>
51 struct gf_powers_memo
{
52 static constexpr uint32_t value
=
53 gf_square_sw(gf_powers_memo
<i
- 1, m
>::value
, m
);
56 struct gf_powers_memo
<0, m
> {
57 static constexpr uint32_t value
= m
;
61 struct gf_powers_make
{
62 template <size_t... i
>
63 constexpr auto operator()(std::index_sequence
<i
...>) const {
64 return std::array
<uint32_t, sizeof...(i
)>{{gf_powers_memo
<i
, m
>::value
...}};
70 #if FOLLY_SSE_PREREQ(4, 2)
72 // Reduction taken from
73 // https://www.nicst.de/crc.pdf
75 // This is an intrinsics-based implementation of listing 3.
76 static uint32_t gf_multiply_crc32c_hw(uint64_t crc1
, uint64_t crc2
, uint32_t) {
77 const auto crc1_xmm
= _mm_set_epi64x(0, crc1
);
78 const auto crc2_xmm
= _mm_set_epi64x(0, crc2
);
79 const auto count
= _mm_set_epi64x(0, 1);
80 const auto res0
= _mm_clmulepi64_si128(crc2_xmm
, crc1_xmm
, 0x00);
81 const auto res1
= _mm_sll_epi64(res0
, count
);
83 // Use hardware crc32c to do reduction from 64 -> 32 bytes
84 const auto res2
= _mm_cvtsi128_si64(res1
);
85 const auto res3
= _mm_crc32_u32(0, res2
);
86 const auto res4
= _mm_extract_epi32(res1
, 1);
90 static uint32_t gf_multiply_crc32_hw(uint64_t crc1
, uint64_t crc2
, uint32_t) {
91 const auto crc1_xmm
= _mm_set_epi64x(0, crc1
);
92 const auto crc2_xmm
= _mm_set_epi64x(0, crc2
);
93 const auto count
= _mm_set_epi64x(0, 1);
94 const auto res0
= _mm_clmulepi64_si128(crc2_xmm
, crc1_xmm
, 0x00);
95 const auto res1
= _mm_sll_epi64(res0
, count
);
97 // Do barrett reduction of 64 -> 32 bytes
98 const auto mask32
= _mm_set_epi32(0, 0, 0, 0xFFFFFFFF);
99 const auto barrett_reduction_constants
=
100 _mm_set_epi32(0x1, 0xDB710641, 0x1, 0xF7011641);
101 const auto res2
= _mm_clmulepi64_si128(
102 _mm_and_si128(res1
, mask32
), barrett_reduction_constants
, 0x00);
103 const auto res3
= _mm_clmulepi64_si128(
104 _mm_and_si128(res2
, mask32
), barrett_reduction_constants
, 0x10);
105 return _mm_cvtsi128_si32(_mm_srli_si128(_mm_xor_si128(res3
, res1
), 4));
110 static uint32_t gf_multiply_crc32c_hw(uint64_t, uint64_t, uint32_t) {
113 static uint32_t gf_multiply_crc32_hw(uint64_t, uint64_t, uint32_t) {
119 static constexpr uint32_t crc32c_m
= 0x82f63b78;
120 static constexpr uint32_t crc32_m
= 0xedb88320;
123 * Pre-calculated powers tables for crc32c and crc32.
125 static constexpr std::array
<uint32_t, 62> const crc32c_powers
=
126 gf_powers_make
<crc32c_m
>{}(std::make_index_sequence
<62>{});
127 static constexpr std::array
<uint32_t, 62> const crc32_powers
=
128 gf_powers_make
<crc32_m
>{}(std::make_index_sequence
<62>{});
130 template <typename F
>
131 static uint32_t crc32_append_zeroes(
136 std::array
<uint32_t, 62> const& powers_array
) {
137 auto powers
= powers_array
.data();
139 // Append by multiplying by consecutive powers of two of the zeroes
144 // Advance directly to next bit set.
145 auto r
= findFirstSet(len
) - 1;
149 crc
= mult(crc
, *powers
, polynomial
);
160 uint32_t crc32_combine_sw(uint32_t crc1
, uint32_t crc2
, size_t crc2len
) {
162 crc32_append_zeroes(gf_multiply_sw
, crc1
, crc2len
, crc32_m
, crc32_powers
);
165 uint32_t crc32_combine_hw(uint32_t crc1
, uint32_t crc2
, size_t crc2len
) {
168 gf_multiply_crc32_hw
, crc1
, crc2len
, crc32_m
, crc32_powers
);
171 uint32_t crc32c_combine_sw(uint32_t crc1
, uint32_t crc2
, size_t crc2len
) {
174 gf_multiply_sw
, crc1
, crc2len
, crc32c_m
, crc32c_powers
);
177 uint32_t crc32c_combine_hw(uint32_t crc1
, uint32_t crc2
, size_t crc2len
) {
180 gf_multiply_crc32c_hw
, crc1
, crc2len
, crc32c_m
, crc32c_powers
);
183 } // namespace detail