Backed out 2 changesets (bug 903746) for causing non-unified build bustages on nsIPri...
[gecko.git] / third_party / highway / hwy / targets.h
blob9e7ccf9e65a295bf046ed8e050f2356b2d87ad03
1 // Copyright 2020 Google LLC
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
16 #ifndef HIGHWAY_HWY_TARGETS_H_
17 #define HIGHWAY_HWY_TARGETS_H_
19 // Allows opting out of C++ standard library usage, which is not available in
20 // some Compiler Explorer environments.
21 #ifndef HWY_NO_LIBCXX
22 #include <vector>
23 #endif
25 // For SIMD module implementations and their callers. Defines which targets to
26 // generate and call.
28 #include "hwy/base.h"
29 #include "hwy/detect_targets.h"
30 #include "hwy/highway_export.h"
32 #if !HWY_ARCH_RVV && !defined(HWY_NO_LIBCXX)
33 #include <atomic>
34 #endif
36 namespace hwy {
38 // Returns bitfield of enabled targets that are supported on this CPU; there is
39 // always at least one such target, hence the return value is never 0. The
40 // targets returned may change after calling DisableTargets. This function is
41 // always defined, but the HWY_SUPPORTED_TARGETS wrapper may allow eliding
42 // calls to it if there is only a single target enabled.
43 HWY_DLLEXPORT int64_t SupportedTargets();
45 // Evaluates to a function call, or literal if there is a single target.
46 #if (HWY_TARGETS & (HWY_TARGETS - 1)) == 0
47 #define HWY_SUPPORTED_TARGETS HWY_TARGETS
48 #else
49 #define HWY_SUPPORTED_TARGETS hwy::SupportedTargets()
50 #endif
52 // Subsequent SupportedTargets will not return targets whose bit(s) are set in
53 // `disabled_targets`. Exception: if SupportedTargets would return 0, it will
54 // instead return HWY_STATIC_TARGET (there must always be one target to call).
56 // This function is useful for disabling targets known to be buggy, or if the
57 // best available target is undesirable (perhaps due to throttling or memory
58 // bandwidth limitations). Use SetSupportedTargetsForTest instead of this
59 // function for iteratively enabling specific targets for testing.
60 HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets);
62 // Subsequent SupportedTargets will return the given set of targets, except
63 // those disabled via DisableTargets. Call with a mask of 0 to disable the mock
64 // and return to the normal SupportedTargets behavior. Used to run tests for
65 // all targets.
66 HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets);
68 #ifndef HWY_NO_LIBCXX
70 // Return the list of targets in HWY_TARGETS supported by the CPU as a list of
71 // individual HWY_* target macros such as HWY_SCALAR or HWY_NEON. This list
72 // is affected by the current SetSupportedTargetsForTest() mock if any.
73 HWY_INLINE std::vector<int64_t> SupportedAndGeneratedTargets() {
74 std::vector<int64_t> ret;
75 for (int64_t targets = SupportedTargets() & HWY_TARGETS; targets != 0;
76 targets = targets & (targets - 1)) {
77 int64_t current_target = targets & ~(targets - 1);
78 ret.push_back(current_target);
80 return ret;
83 #endif // HWY_NO_LIBCXX
85 static inline HWY_MAYBE_UNUSED const char* TargetName(int64_t target) {
86 switch (target) {
87 #if HWY_ARCH_X86
88 case HWY_SSE2:
89 return "SSE2";
90 case HWY_SSSE3:
91 return "SSSE3";
92 case HWY_SSE4:
93 return "SSE4";
94 case HWY_AVX2:
95 return "AVX2";
96 case HWY_AVX3:
97 return "AVX3";
98 case HWY_AVX3_DL:
99 return "AVX3_DL";
100 case HWY_AVX3_ZEN4:
101 return "AVX3_ZEN4";
102 #endif
104 #if HWY_ARCH_ARM
105 case HWY_SVE2_128:
106 return "SVE2_128";
107 case HWY_SVE_256:
108 return "SVE_256";
109 case HWY_SVE2:
110 return "SVE2";
111 case HWY_SVE:
112 return "SVE";
113 case HWY_NEON:
114 return "NEON";
115 case HWY_NEON_WITHOUT_AES:
116 return "NEON_WITHOUT_AES";
117 #endif
119 #if HWY_ARCH_PPC
120 case HWY_PPC8:
121 return "PPC8";
122 case HWY_PPC9:
123 return "PPC9";
124 case HWY_PPC10:
125 return "PPC10";
126 #endif
128 #if HWY_ARCH_WASM
129 case HWY_WASM:
130 return "WASM";
131 case HWY_WASM_EMU256:
132 return "WASM_EMU256";
133 #endif
135 #if HWY_ARCH_RVV
136 case HWY_RVV:
137 return "RVV";
138 #endif
140 case HWY_EMU128:
141 return "EMU128";
142 case HWY_SCALAR:
143 return "SCALAR";
145 default:
146 return "Unknown"; // must satisfy gtest IsValidParamName()
150 // The maximum number of dynamic targets on any architecture is defined by
151 // HWY_MAX_DYNAMIC_TARGETS and depends on the arch.
153 // For the ChosenTarget mask and index we use a different bit arrangement than
154 // in the HWY_TARGETS mask. Only the targets involved in the current
155 // architecture are used in this mask, and therefore only the least significant
156 // (HWY_MAX_DYNAMIC_TARGETS + 2) bits of the int64_t mask are used. The least
157 // significant bit is set when the mask is not initialized, the next
158 // HWY_MAX_DYNAMIC_TARGETS more significant bits are a range of bits from the
159 // HWY_TARGETS or SupportedTargets() mask for the given architecture shifted to
160 // that position and the next more significant bit is used for HWY_SCALAR (if
161 // HWY_COMPILE_ONLY_SCALAR is defined) or HWY_EMU128. Because of this we need to
162 // define equivalent values for HWY_TARGETS in this representation.
163 // This mask representation allows to use ctz() on this mask and obtain a small
164 // number that's used as an index of the table for dynamic dispatch. In this
165 // way the first entry is used when the mask is uninitialized, the following
166 // HWY_MAX_DYNAMIC_TARGETS are for dynamic dispatch and the last one is for
167 // scalar.
169 // The HWY_SCALAR/HWY_EMU128 bit in the ChosenTarget mask format.
170 #define HWY_CHOSEN_TARGET_MASK_SCALAR (1LL << (HWY_MAX_DYNAMIC_TARGETS + 1))
172 // Converts from a HWY_TARGETS mask to a ChosenTarget mask format for the
173 // current architecture.
174 #define HWY_CHOSEN_TARGET_SHIFT(X) \
175 ((((X) >> (HWY_HIGHEST_TARGET_BIT + 1 - HWY_MAX_DYNAMIC_TARGETS)) & \
176 ((1LL << HWY_MAX_DYNAMIC_TARGETS) - 1)) \
177 << 1)
179 // The HWY_TARGETS mask in the ChosenTarget mask format.
180 #define HWY_CHOSEN_TARGET_MASK_TARGETS \
181 (HWY_CHOSEN_TARGET_SHIFT(HWY_TARGETS) | HWY_CHOSEN_TARGET_MASK_SCALAR | 1LL)
183 #if HWY_ARCH_X86
184 // Maximum number of dynamic targets, changing this value is an ABI incompatible
185 // change
186 #define HWY_MAX_DYNAMIC_TARGETS 15
187 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_X86
188 // These must match the order in which the HWY_TARGETS are defined
189 // starting by the least significant (HWY_HIGHEST_TARGET_BIT + 1 -
190 // HWY_MAX_DYNAMIC_TARGETS) bit. This list must contain exactly
191 // HWY_MAX_DYNAMIC_TARGETS elements and does not include SCALAR. The first entry
192 // corresponds to the best target. Don't include a "," at the end of the list.
193 #define HWY_CHOOSE_TARGET_LIST(func_name) \
194 nullptr, /* reserved */ \
195 nullptr, /* reserved */ \
196 nullptr, /* reserved */ \
197 nullptr, /* reserved */ \
198 nullptr, /* reserved */ \
199 nullptr, /* reserved */ \
200 HWY_CHOOSE_AVX3_ZEN4(func_name), /* AVX3_ZEN4 */ \
201 HWY_CHOOSE_AVX3_DL(func_name), /* AVX3_DL */ \
202 HWY_CHOOSE_AVX3(func_name), /* AVX3 */ \
203 HWY_CHOOSE_AVX2(func_name), /* AVX2 */ \
204 nullptr, /* AVX */ \
205 HWY_CHOOSE_SSE4(func_name), /* SSE4 */ \
206 HWY_CHOOSE_SSSE3(func_name), /* SSSE3 */ \
207 nullptr, /* reserved - SSE3? */ \
208 HWY_CHOOSE_SSE2(func_name) /* SSE2 */
210 #elif HWY_ARCH_ARM
211 // See HWY_ARCH_X86 above for details.
212 #define HWY_MAX_DYNAMIC_TARGETS 15
213 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_ARM
214 #define HWY_CHOOSE_TARGET_LIST(func_name) \
215 nullptr, /* reserved */ \
216 nullptr, /* reserved */ \
217 nullptr, /* reserved */ \
218 nullptr, /* reserved */ \
219 nullptr, /* reserved */ \
220 nullptr, /* reserved */ \
221 nullptr, /* reserved */ \
222 nullptr, /* reserved */ \
223 nullptr, /* reserved */ \
224 HWY_CHOOSE_SVE2_128(func_name), /* SVE2 128-bit */ \
225 HWY_CHOOSE_SVE_256(func_name), /* SVE 256-bit */ \
226 HWY_CHOOSE_SVE2(func_name), /* SVE2 */ \
227 HWY_CHOOSE_SVE(func_name), /* SVE */ \
228 HWY_CHOOSE_NEON(func_name), /* NEON */ \
229 HWY_CHOOSE_NEON_WITHOUT_AES(func_name) /* NEON without AES */
231 #elif HWY_ARCH_RVV
232 // See HWY_ARCH_X86 above for details.
233 #define HWY_MAX_DYNAMIC_TARGETS 9
234 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_RVV
235 #define HWY_CHOOSE_TARGET_LIST(func_name) \
236 nullptr, /* reserved */ \
237 nullptr, /* reserved */ \
238 nullptr, /* reserved */ \
239 nullptr, /* reserved */ \
240 nullptr, /* reserved */ \
241 nullptr, /* reserved */ \
242 nullptr, /* reserved */ \
243 HWY_CHOOSE_RVV(func_name), /* RVV */ \
244 nullptr /* reserved */
246 #elif HWY_ARCH_PPC
247 // See HWY_ARCH_X86 above for details.
248 #define HWY_MAX_DYNAMIC_TARGETS 9
249 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_PPC
250 #define HWY_CHOOSE_TARGET_LIST(func_name) \
251 nullptr, /* reserved */ \
252 nullptr, /* reserved */ \
253 nullptr, /* reserved */ \
254 nullptr, /* reserved */ \
255 HWY_CHOOSE_PPC10(func_name), /* PPC10 */ \
256 HWY_CHOOSE_PPC9(func_name), /* PPC9 */ \
257 HWY_CHOOSE_PPC8(func_name), /* PPC8 */ \
258 nullptr, /* reserved (VSX or AltiVec) */ \
259 nullptr /* reserved (VSX or AltiVec) */
261 #elif HWY_ARCH_WASM
262 // See HWY_ARCH_X86 above for details.
263 #define HWY_MAX_DYNAMIC_TARGETS 9
264 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_WASM
265 #define HWY_CHOOSE_TARGET_LIST(func_name) \
266 nullptr, /* reserved */ \
267 nullptr, /* reserved */ \
268 nullptr, /* reserved */ \
269 nullptr, /* reserved */ \
270 nullptr, /* reserved */ \
271 nullptr, /* reserved */ \
272 HWY_CHOOSE_WASM_EMU256(func_name), /* WASM_EMU256 */ \
273 HWY_CHOOSE_WASM(func_name), /* WASM */ \
274 nullptr /* reserved */
276 #else
277 // Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though
278 // still creating single-entry tables in HWY_EXPORT to ensure portability.
279 #define HWY_MAX_DYNAMIC_TARGETS 1
280 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_SCALAR
281 #endif
283 // Bitfield of supported and enabled targets. The format differs from that of
284 // HWY_TARGETS; the lowest bit governs the first function pointer (which is
285 // special in that it calls FunctionCache, then Update, then dispatches to the
286 // actual implementation) in the tables created by HWY_EXPORT. Monostate (see
287 // GetChosenTarget), thread-safe except on RVV.
288 struct ChosenTarget {
289 public:
290 // Reset bits according to `targets` (typically the return value of
291 // SupportedTargets()). Postcondition: IsInitialized() == true.
292 void Update(int64_t targets) {
293 // These are `targets` shifted downwards, see above. Also include SCALAR
294 // (corresponds to the last entry in the function table) as fallback.
295 StoreMask(HWY_CHOSEN_TARGET_SHIFT(targets) | HWY_CHOSEN_TARGET_MASK_SCALAR);
298 // Reset to the uninitialized state, so that FunctionCache will call Update
299 // during the next HWY_DYNAMIC_DISPATCH, and IsInitialized returns false.
300 void DeInit() { StoreMask(1); }
302 // Whether Update was called. This indicates whether any HWY_DYNAMIC_DISPATCH
303 // function was called, which we check in tests.
304 bool IsInitialized() const { return LoadMask() != 1; }
306 // Return the index in the dynamic dispatch table to be used by the current
307 // CPU. Note that this method must be in the header file so it uses the value
308 // of HWY_CHOSEN_TARGET_MASK_TARGETS defined in the translation unit that
309 // calls it, which may be different from others. This means we only enable
310 // those targets that were actually compiled in this module.
311 size_t HWY_INLINE GetIndex() const {
312 return hwy::Num0BitsBelowLS1Bit_Nonzero64(
313 static_cast<uint64_t>(LoadMask() & HWY_CHOSEN_TARGET_MASK_TARGETS));
316 private:
317 // TODO(janwas): remove RVV once <atomic> is available
318 #if HWY_ARCH_RVV || defined(HWY_NO_LIBCXX)
319 int64_t LoadMask() const { return mask_; }
320 void StoreMask(int64_t mask) { mask_ = mask; }
322 int64_t mask_{1}; // Initialized to 1 so GetIndex() returns 0.
323 #else
324 int64_t LoadMask() const { return mask_.load(); }
325 void StoreMask(int64_t mask) { mask_.store(mask); }
327 std::atomic<int64_t> mask_{1}; // Initialized to 1 so GetIndex() returns 0.
328 #endif // HWY_ARCH_RVV
331 // For internal use (e.g. by FunctionCache and DisableTargets).
332 HWY_DLLEXPORT ChosenTarget& GetChosenTarget();
334 } // namespace hwy
336 #endif // HIGHWAY_HWY_TARGETS_H_