Squashed 'src/leveldb/' changes from a31c8aa40..196962ff0
[bitcoinplatinum.git] / port / port_posix_sse.cc
blob1e519ba0b64befe92b0a6ca70f290f699151baab
1 // Copyright 2016 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
4 //
5 // A portable implementation of crc32c, optimized to handle
6 // four bytes at a time.
7 //
8 // In a separate source file to allow this accelerated CRC32C function to be
9 // compiled with the appropriate compiler flags to enable x86 SSE 4.2
10 // instructions.
12 #include <stdint.h>
13 #include <string.h>
14 #include "port/port.h"
16 #if defined(LEVELDB_PLATFORM_POSIX_SSE)
18 #if defined(_MSC_VER)
19 #include <intrin.h>
20 #elif defined(__GNUC__) && defined(__SSE4_2__)
21 #include <nmmintrin.h>
22 #include <cpuid.h>
23 #endif
25 #endif // defined(LEVELDB_PLATFORM_POSIX_SSE)
27 namespace leveldb {
28 namespace port {
30 #if defined(LEVELDB_PLATFORM_POSIX_SSE)
32 // Used to fetch a naturally-aligned 32-bit word in little endian byte-order
33 static inline uint32_t LE_LOAD32(const uint8_t *p) {
34 // SSE is x86 only, so ensured that |p| is always little-endian.
35 uint32_t word;
36 memcpy(&word, p, sizeof(word));
37 return word;
40 #if defined(_M_X64) || defined(__x86_64__) // LE_LOAD64 is only used on x64.
42 // Used to fetch a naturally-aligned 64-bit word in little endian byte-order
43 static inline uint64_t LE_LOAD64(const uint8_t *p) {
44 uint64_t dword;
45 memcpy(&dword, p, sizeof(dword));
46 return dword;
49 #endif // defined(_M_X64) || defined(__x86_64__)
51 static inline bool HaveSSE42() {
52 #if defined(_MSC_VER)
53 int cpu_info[4];
54 __cpuid(cpu_info, 1);
55 return (cpu_info[2] & (1 << 20)) != 0;
56 #elif defined(__GNUC__)
57 unsigned int eax, ebx, ecx, edx;
58 __get_cpuid(1, &eax, &ebx, &ecx, &edx);
59 return (ecx & (1 << 20)) != 0;
60 #else
61 return false;
62 #endif
65 #endif // defined(LEVELDB_PLATFORM_POSIX_SSE)
67 // For further improvements see Intel publication at:
68 // http://download.intel.com/design/intarch/papers/323405.pdf
69 uint32_t AcceleratedCRC32C(uint32_t crc, const char* buf, size_t size) {
70 #if !defined(LEVELDB_PLATFORM_POSIX_SSE)
71 return 0;
72 #else
73 static bool have = HaveSSE42();
74 if (!have) {
75 return 0;
78 const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
79 const uint8_t *e = p + size;
80 uint32_t l = crc ^ 0xffffffffu;
82 #define STEP1 do { \
83 l = _mm_crc32_u8(l, *p++); \
84 } while (0)
85 #define STEP4 do { \
86 l = _mm_crc32_u32(l, LE_LOAD32(p)); \
87 p += 4; \
88 } while (0)
89 #define STEP8 do { \
90 l = _mm_crc32_u64(l, LE_LOAD64(p)); \
91 p += 8; \
92 } while (0)
94 if (size > 16) {
95 // Process unaligned bytes
96 for (unsigned int i = reinterpret_cast<uintptr_t>(p) % 8; i; --i) {
97 STEP1;
100 // _mm_crc32_u64 is only available on x64.
101 #if defined(_M_X64) || defined(__x86_64__)
102 // Process 8 bytes at a time
103 while ((e-p) >= 8) {
104 STEP8;
106 // Process 4 bytes at a time
107 if ((e-p) >= 4) {
108 STEP4;
110 #else // !(defined(_M_X64) || defined(__x86_64__))
111 // Process 4 bytes at a time
112 while ((e-p) >= 4) {
113 STEP4;
115 #endif // defined(_M_X64) || defined(__x86_64__)
117 // Process the last few bytes
118 while (p != e) {
119 STEP1;
121 #undef STEP8
122 #undef STEP4
123 #undef STEP1
124 return l ^ 0xffffffffu;
125 #endif // defined(LEVELDB_PLATFORM_POSIX_SSE)
128 } // namespace port
129 } // namespace leveldb