Bug 1861709 replace AudioCallbackDriver::ThreadRunning() assertions that mean to...
[gecko.git] / third_party / highway / hwy / cache_control.h
blob6e7665dd29a2256b0af492625ffe252abe875a69
1 // Copyright 2020 Google LLC
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
16 #ifndef HIGHWAY_HWY_CACHE_CONTROL_H_
17 #define HIGHWAY_HWY_CACHE_CONTROL_H_
19 #include "hwy/base.h"
21 // Requires SSE2; fails to compile on 32-bit Clang 7 (see
22 // https://github.com/gperftools/gperftools/issues/946).
23 #if !defined(__SSE2__) || (HWY_COMPILER_CLANG && HWY_ARCH_X86_32)
24 #undef HWY_DISABLE_CACHE_CONTROL
25 #define HWY_DISABLE_CACHE_CONTROL
26 #endif
28 // intrin.h is sufficient on MSVC and already included by base.h.
29 #if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL) && !HWY_COMPILER_MSVC
30 #include <emmintrin.h> // SSE2
31 #include <xmmintrin.h> // _mm_prefetch
32 #endif
34 namespace hwy {
36 // Even if N*sizeof(T) is smaller, Stream may write a multiple of this size.
37 #define HWY_STREAM_MULTIPLE 16
39 // The following functions may also require an attribute.
40 #if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL) && !HWY_COMPILER_MSVC
41 #define HWY_ATTR_CACHE __attribute__((target("sse2")))
42 #else
43 #define HWY_ATTR_CACHE
44 #endif
46 // Windows.h #defines this, which causes infinite recursion. Temporarily
47 // undefine to avoid conflict with our function.
48 // TODO(janwas): remove when this function is removed.
49 #pragma push_macro("LoadFence")
50 #undef LoadFence
52 // Delays subsequent loads until prior loads are visible. Beware of potentially
53 // differing behavior across architectures and vendors: on Intel but not
54 // AMD CPUs, also serves as a full fence (waits for all prior instructions to
55 // complete).
56 HWY_INLINE HWY_ATTR_CACHE void LoadFence() {
57 #if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
58 _mm_lfence();
59 #endif
62 // TODO(janwas): remove when this function is removed. (See above.)
63 #pragma pop_macro("LoadFence")
65 // Ensures values written by previous `Stream` calls are visible on the current
66 // core. This is NOT sufficient for synchronizing across cores; when `Stream`
67 // outputs are to be consumed by other core(s), the producer must publish
68 // availability (e.g. via mutex or atomic_flag) after `FlushStream`.
69 HWY_INLINE HWY_ATTR_CACHE void FlushStream() {
70 #if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
71 _mm_sfence();
72 #endif
75 // Optionally begins loading the cache line containing "p" to reduce latency of
76 // subsequent actual loads.
77 template <typename T>
78 HWY_INLINE HWY_ATTR_CACHE void Prefetch(const T* p) {
79 #if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
80 _mm_prefetch(reinterpret_cast<const char*>(p), _MM_HINT_T0);
81 #elif HWY_COMPILER_GCC // includes clang
82 // Hint=0 (NTA) behavior differs, but skipping outer caches is probably not
83 // desirable, so use the default 3 (keep in caches).
84 __builtin_prefetch(p, /*write=*/0, /*hint=*/3);
85 #else
86 (void)p;
87 #endif
90 // Invalidates and flushes the cache line containing "p", if possible.
91 HWY_INLINE HWY_ATTR_CACHE void FlushCacheline(const void* p) {
92 #if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
93 _mm_clflush(p);
94 #else
95 (void)p;
96 #endif
99 // When called inside a spin-loop, may reduce power consumption.
100 HWY_INLINE HWY_ATTR_CACHE void Pause() {
101 #if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
102 _mm_pause();
103 #endif
106 } // namespace hwy
108 #endif // HIGHWAY_HWY_CACHE_CONTROL_H_