third-party/folly/src/folly/hash/detail/Crc32CombineDetail.cpp

   1 /*
   2  * Copyright (c) Meta Platforms, Inc. and affiliates.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <array>
  18 #include <utility>
  19
  20 #include <folly/Bits.h>
  21 #include <folly/hash/detail/ChecksumDetail.h>
  22
  23 namespace folly {
  24
  25 // Standard galois-field multiply.  The only modification is that a,
  26 // b, m, and p are all bit-reflected.
  27 //
  28 // https://en.wikipedia.org/wiki/Finite_field_arithmetic
  29 static constexpr uint32_t gf_multiply_sw_1(
  30     size_t i, uint32_t p, uint32_t a, uint32_t b, uint32_t m) {
  31   // clang-format off
  32   return i == 32 ? p : gf_multiply_sw_1(
  33       /* i = */ i + 1,
  34       /* p = */ p ^ (-((b >> 31) & 1) & a),
  35       /* a = */ (a >> 1) ^ (-(a & 1) & m),
  36       /* b = */ b << 1,
  37       /* m = */ m);
  38   // clang-format on
  39 }
  40 static constexpr uint32_t gf_multiply_sw(uint32_t a, uint32_t b, uint32_t m) {
  41   return gf_multiply_sw_1(/* i = */ 0, /* p = */ 0, a, b, m);
  42 }
  43
  44 static constexpr uint32_t gf_square_sw(uint32_t a, uint32_t m) {
  45   return gf_multiply_sw(a, a, m);
  46 }
  47
  48 namespace {
  49
  50 template <size_t i, uint32_t m>
  51 struct gf_powers_memo {
  52   static constexpr uint32_t value =
  53       gf_square_sw(gf_powers_memo<i - 1, m>::value, m);
  54 };
  55 template <uint32_t m>
  56 struct gf_powers_memo<0, m> {
  57   static constexpr uint32_t value = m;
  58 };
  59
  60 template <uint32_t m>
  61 struct gf_powers_make {
  62   template <size_t... i>
  63   constexpr auto operator()(std::index_sequence<i...>) const {
  64     return std::array<uint32_t, sizeof...(i)>{{gf_powers_memo<i, m>::value...}};
  65   }
  66 };
  67
  68 } // namespace
  69
  70 #if FOLLY_SSE_PREREQ(4, 2)
  71
  72 // Reduction taken from
  73 // https://www.nicst.de/crc.pdf
  74 //
  75 // This is an intrinsics-based implementation of listing 3.
  76 static uint32_t gf_multiply_crc32c_hw(uint64_t crc1, uint64_t crc2, uint32_t) {
  77   const auto crc1_xmm = _mm_set_epi64x(0, crc1);
  78   const auto crc2_xmm = _mm_set_epi64x(0, crc2);
  79   const auto count = _mm_set_epi64x(0, 1);
  80   const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00);
  81   const auto res1 = _mm_sll_epi64(res0, count);
  82
  83   // Use hardware crc32c to do reduction from 64 -> 32 bytes
  84   const auto res2 = _mm_cvtsi128_si64(res1);
  85   const auto res3 = _mm_crc32_u32(0, res2);
  86   const auto res4 = _mm_extract_epi32(res1, 1);
  87   return res3 ^ res4;
  88 }
  89
  90 static uint32_t gf_multiply_crc32_hw(uint64_t crc1, uint64_t crc2, uint32_t) {
  91   const auto crc1_xmm = _mm_set_epi64x(0, crc1);
  92   const auto crc2_xmm = _mm_set_epi64x(0, crc2);
  93   const auto count = _mm_set_epi64x(0, 1);
  94   const auto res0 = _mm_clmulepi64_si128(crc2_xmm, crc1_xmm, 0x00);
  95   const auto res1 = _mm_sll_epi64(res0, count);
  96
  97   // Do barrett reduction of 64 -> 32 bytes
  98   const auto mask32 = _mm_set_epi32(0, 0, 0, 0xFFFFFFFF);
  99   const auto barrett_reduction_constants =
 100       _mm_set_epi32(0x1, 0xDB710641, 0x1, 0xF7011641);
 101   const auto res2 = _mm_clmulepi64_si128(
 102       _mm_and_si128(res1, mask32), barrett_reduction_constants, 0x00);
 103   const auto res3 = _mm_clmulepi64_si128(
 104       _mm_and_si128(res2, mask32), barrett_reduction_constants, 0x10);
 105   return _mm_cvtsi128_si32(_mm_srli_si128(_mm_xor_si128(res3, res1), 4));
 106 }
 107
 108 #else
 109
 110 static uint32_t gf_multiply_crc32c_hw(uint64_t, uint64_t, uint32_t) {
 111   return 0;
 112 }
 113 static uint32_t gf_multiply_crc32_hw(uint64_t, uint64_t, uint32_t) {
 114   return 0;
 115 }
 116
 117 #endif
 118
 119 static constexpr uint32_t crc32c_m = 0x82f63b78;
 120 static constexpr uint32_t crc32_m = 0xedb88320;
 121
 122 /*
 123  * Pre-calculated powers tables for crc32c and crc32.
 124  */
 125 static constexpr std::array<uint32_t, 62> const crc32c_powers =
 126     gf_powers_make<crc32c_m>{}(std::make_index_sequence<62>{});
 127 static constexpr std::array<uint32_t, 62> const crc32_powers =
 128     gf_powers_make<crc32_m>{}(std::make_index_sequence<62>{});
 129
 130 template <typename F>
 131 static uint32_t crc32_append_zeroes(
 132     F mult,
 133     uint32_t crc,
 134     size_t len,
 135     uint32_t polynomial,
 136     std::array<uint32_t, 62> const& powers_array) {
 137   auto powers = powers_array.data();
 138
 139   // Append by multiplying by consecutive powers of two of the zeroes
 140   // array
 141   len >>= 2;
 142
 143   while (len) {
 144     // Advance directly to next bit set.
 145     auto r = findFirstSet(len) - 1;
 146     len >>= r;
 147     powers += r;
 148
 149     crc = mult(crc, *powers, polynomial);
 150
 151     len >>= 1;
 152     powers++;
 153   }
 154
 155   return crc;
 156 }
 157
 158 namespace detail {
 159
 160 uint32_t crc32_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
 161   return crc2 ^
 162       crc32_append_zeroes(gf_multiply_sw, crc1, crc2len, crc32_m, crc32_powers);
 163 }
 164
 165 uint32_t crc32_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
 166   return crc2 ^
 167       crc32_append_zeroes(
 168              gf_multiply_crc32_hw, crc1, crc2len, crc32_m, crc32_powers);
 169 }
 170
 171 uint32_t crc32c_combine_sw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
 172   return crc2 ^
 173       crc32_append_zeroes(
 174              gf_multiply_sw, crc1, crc2len, crc32c_m, crc32c_powers);
 175 }
 176
 177 uint32_t crc32c_combine_hw(uint32_t crc1, uint32_t crc2, size_t crc2len) {
 178   return crc2 ^
 179       crc32_append_zeroes(
 180              gf_multiply_crc32c_hw, crc1, crc2len, crc32c_m, crc32c_powers);
 181 }
 182
 183 } // namespace detail
 184
 185 } // namespace folly