Backed out 2 changesets (bug 903746) for causing non-unified build bustages on nsIPri...
[gecko.git] / third_party / highway / hwy / targets.cc
blob8886ea48ed4ebccfc5bbe88e05d2d0178082b012
1 // Copyright 2019 Google LLC
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
16 #include "hwy/targets.h"
18 #ifndef __STDC_FORMAT_MACROS
19 #define __STDC_FORMAT_MACROS // before inttypes.h
20 #endif
21 #include <inttypes.h> // IWYU pragma: keep (PRIx64)
22 #include <stdarg.h>
23 #include <stdio.h>
24 #include <stdlib.h> // abort / exit
26 #include "hwy/highway.h"
27 #include "hwy/per_target.h" // VectorBytes
29 #if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
30 #include "sanitizer/common_interface_defs.h" // __sanitizer_print_stack_trace
31 #endif
33 #if HWY_ARCH_X86
34 #include <xmmintrin.h>
35 #if HWY_COMPILER_MSVC
36 #include <intrin.h>
37 #else // !HWY_COMPILER_MSVC
38 #include <cpuid.h>
39 #endif // HWY_COMPILER_MSVC
41 #elif (HWY_ARCH_ARM || HWY_ARCH_PPC) && HWY_OS_LINUX
42 // sys/auxv.h does not always include asm/hwcap.h, or define HWCAP*, hence we
43 // still include this directly. See #1199.
44 #ifndef TOOLCHAIN_MISS_ASM_HWCAP_H
45 #include <asm/hwcap.h>
46 #endif
47 #ifndef TOOLCHAIN_MISS_SYS_AUXV_H
48 #include <sys/auxv.h>
49 #endif
51 #endif // HWY_ARCH_*
53 namespace hwy {
54 namespace {
56 // When running tests, this value can be set to the mocked supported targets
57 // mask. Only written to from a single thread before the test starts.
58 int64_t supported_targets_for_test_ = 0;
60 // Mask of targets disabled at runtime with DisableTargets.
61 int64_t supported_mask_ = LimitsMax<int64_t>();
63 #if HWY_ARCH_X86 && HWY_HAVE_RUNTIME_DISPATCH
64 namespace x86 {
66 // Calls CPUID instruction with eax=level and ecx=count and returns the result
67 // in abcd array where abcd = {eax, ebx, ecx, edx} (hence the name abcd).
68 HWY_INLINE void Cpuid(const uint32_t level, const uint32_t count,
69 uint32_t* HWY_RESTRICT abcd) {
70 #if HWY_COMPILER_MSVC
71 int regs[4];
72 __cpuidex(regs, level, count);
73 for (int i = 0; i < 4; ++i) {
74 abcd[i] = regs[i];
76 #else // HWY_COMPILER_MSVC
77 uint32_t a;
78 uint32_t b;
79 uint32_t c;
80 uint32_t d;
81 __cpuid_count(level, count, a, b, c, d);
82 abcd[0] = a;
83 abcd[1] = b;
84 abcd[2] = c;
85 abcd[3] = d;
86 #endif // HWY_COMPILER_MSVC
89 HWY_INLINE bool IsBitSet(const uint32_t reg, const int index) {
90 return (reg & (1U << index)) != 0;
93 // Returns the lower 32 bits of extended control register 0.
94 // Requires CPU support for "OSXSAVE" (see below).
95 uint32_t ReadXCR0() {
96 #if HWY_COMPILER_MSVC
97 return static_cast<uint32_t>(_xgetbv(0));
98 #else // HWY_COMPILER_MSVC
99 uint32_t xcr0, xcr0_high;
100 const uint32_t index = 0;
101 asm volatile(".byte 0x0F, 0x01, 0xD0"
102 : "=a"(xcr0), "=d"(xcr0_high)
103 : "c"(index));
104 return xcr0;
105 #endif // HWY_COMPILER_MSVC
108 bool IsAMD() {
109 uint32_t abcd[4];
110 Cpuid(0, 0, abcd);
111 const uint32_t max_level = abcd[0];
112 return max_level >= 1 && abcd[1] == 0x68747541 && abcd[2] == 0x444d4163 &&
113 abcd[3] == 0x69746e65;
116 // Arbitrary bit indices indicating which instruction set extensions are
117 // supported. Use enum to ensure values are distinct.
118 enum class FeatureIndex : uint32_t {
119 kSSE = 0,
120 kSSE2,
121 kSSE3,
122 kSSSE3,
124 kSSE41,
125 kSSE42,
126 kCLMUL,
127 kAES,
129 kAVX,
130 kAVX2,
131 kF16C,
132 kFMA,
133 kLZCNT,
134 kBMI,
135 kBMI2,
137 kAVX512F,
138 kAVX512VL,
139 kAVX512CD,
140 kAVX512DQ,
141 kAVX512BW,
143 kVNNI,
144 kVPCLMULQDQ,
145 kVBMI,
146 kVBMI2,
147 kVAES,
148 kPOPCNTDQ,
149 kBITALG,
150 kGFNI,
152 kSentinel
154 static_assert(static_cast<size_t>(FeatureIndex::kSentinel) < 64,
155 "Too many bits for u64");
157 HWY_INLINE constexpr uint64_t Bit(FeatureIndex index) {
158 return 1ull << static_cast<size_t>(index);
161 // Returns bit array of FeatureIndex from CPUID feature flags.
162 uint64_t FlagsFromCPUID() {
163 uint64_t flags = 0; // return value
164 uint32_t abcd[4];
165 Cpuid(0, 0, abcd);
166 const uint32_t max_level = abcd[0];
168 // Standard feature flags
169 Cpuid(1, 0, abcd);
170 flags |= IsBitSet(abcd[3], 25) ? Bit(FeatureIndex::kSSE) : 0;
171 flags |= IsBitSet(abcd[3], 26) ? Bit(FeatureIndex::kSSE2) : 0;
172 flags |= IsBitSet(abcd[2], 0) ? Bit(FeatureIndex::kSSE3) : 0;
173 flags |= IsBitSet(abcd[2], 1) ? Bit(FeatureIndex::kCLMUL) : 0;
174 flags |= IsBitSet(abcd[2], 9) ? Bit(FeatureIndex::kSSSE3) : 0;
175 flags |= IsBitSet(abcd[2], 12) ? Bit(FeatureIndex::kFMA) : 0;
176 flags |= IsBitSet(abcd[2], 19) ? Bit(FeatureIndex::kSSE41) : 0;
177 flags |= IsBitSet(abcd[2], 20) ? Bit(FeatureIndex::kSSE42) : 0;
178 flags |= IsBitSet(abcd[2], 25) ? Bit(FeatureIndex::kAES) : 0;
179 flags |= IsBitSet(abcd[2], 28) ? Bit(FeatureIndex::kAVX) : 0;
180 flags |= IsBitSet(abcd[2], 29) ? Bit(FeatureIndex::kF16C) : 0;
182 // Extended feature flags
183 Cpuid(0x80000001U, 0, abcd);
184 flags |= IsBitSet(abcd[2], 5) ? Bit(FeatureIndex::kLZCNT) : 0;
186 // Extended features
187 if (max_level >= 7) {
188 Cpuid(7, 0, abcd);
189 flags |= IsBitSet(abcd[1], 3) ? Bit(FeatureIndex::kBMI) : 0;
190 flags |= IsBitSet(abcd[1], 5) ? Bit(FeatureIndex::kAVX2) : 0;
191 flags |= IsBitSet(abcd[1], 8) ? Bit(FeatureIndex::kBMI2) : 0;
193 flags |= IsBitSet(abcd[1], 16) ? Bit(FeatureIndex::kAVX512F) : 0;
194 flags |= IsBitSet(abcd[1], 17) ? Bit(FeatureIndex::kAVX512DQ) : 0;
195 flags |= IsBitSet(abcd[1], 28) ? Bit(FeatureIndex::kAVX512CD) : 0;
196 flags |= IsBitSet(abcd[1], 30) ? Bit(FeatureIndex::kAVX512BW) : 0;
197 flags |= IsBitSet(abcd[1], 31) ? Bit(FeatureIndex::kAVX512VL) : 0;
199 flags |= IsBitSet(abcd[2], 1) ? Bit(FeatureIndex::kVBMI) : 0;
200 flags |= IsBitSet(abcd[2], 6) ? Bit(FeatureIndex::kVBMI2) : 0;
201 flags |= IsBitSet(abcd[2], 8) ? Bit(FeatureIndex::kGFNI) : 0;
202 flags |= IsBitSet(abcd[2], 9) ? Bit(FeatureIndex::kVAES) : 0;
203 flags |= IsBitSet(abcd[2], 10) ? Bit(FeatureIndex::kVPCLMULQDQ) : 0;
204 flags |= IsBitSet(abcd[2], 11) ? Bit(FeatureIndex::kVNNI) : 0;
205 flags |= IsBitSet(abcd[2], 12) ? Bit(FeatureIndex::kBITALG) : 0;
206 flags |= IsBitSet(abcd[2], 14) ? Bit(FeatureIndex::kPOPCNTDQ) : 0;
209 return flags;
212 // Each Highway target requires a 'group' of multiple features/flags.
213 constexpr uint64_t kGroupSSE2 =
214 Bit(FeatureIndex::kSSE) | Bit(FeatureIndex::kSSE2);
216 constexpr uint64_t kGroupSSSE3 =
217 Bit(FeatureIndex::kSSE3) | Bit(FeatureIndex::kSSSE3) | kGroupSSE2;
219 constexpr uint64_t kGroupSSE4 =
220 Bit(FeatureIndex::kSSE41) | Bit(FeatureIndex::kSSE42) |
221 Bit(FeatureIndex::kCLMUL) | Bit(FeatureIndex::kAES) | kGroupSSSE3;
223 // We normally assume BMI/BMI2/FMA are available if AVX2 is. This allows us to
224 // use BZHI and (compiler-generated) MULX. However, VirtualBox lacks them
225 // [https://www.virtualbox.org/ticket/15471]. Thus we provide the option of
226 // avoiding using and requiring these so AVX2 can still be used.
227 #ifdef HWY_DISABLE_BMI2_FMA
228 constexpr uint64_t kGroupBMI2_FMA = 0;
229 #else
230 constexpr uint64_t kGroupBMI2_FMA = Bit(FeatureIndex::kBMI) |
231 Bit(FeatureIndex::kBMI2) |
232 Bit(FeatureIndex::kFMA);
233 #endif
235 #ifdef HWY_DISABLE_F16C
236 constexpr uint64_t kGroupF16C = 0;
237 #else
238 constexpr uint64_t kGroupF16C = Bit(FeatureIndex::kF16C);
239 #endif
241 constexpr uint64_t kGroupAVX2 =
242 Bit(FeatureIndex::kAVX) | Bit(FeatureIndex::kAVX2) |
243 Bit(FeatureIndex::kLZCNT) | kGroupBMI2_FMA | kGroupF16C | kGroupSSE4;
245 constexpr uint64_t kGroupAVX3 =
246 Bit(FeatureIndex::kAVX512F) | Bit(FeatureIndex::kAVX512VL) |
247 Bit(FeatureIndex::kAVX512DQ) | Bit(FeatureIndex::kAVX512BW) |
248 Bit(FeatureIndex::kAVX512CD) | kGroupAVX2;
250 constexpr uint64_t kGroupAVX3_DL =
251 Bit(FeatureIndex::kVNNI) | Bit(FeatureIndex::kVPCLMULQDQ) |
252 Bit(FeatureIndex::kVBMI) | Bit(FeatureIndex::kVBMI2) |
253 Bit(FeatureIndex::kVAES) | Bit(FeatureIndex::kPOPCNTDQ) |
254 Bit(FeatureIndex::kBITALG) | Bit(FeatureIndex::kGFNI) | kGroupAVX3;
256 int64_t DetectTargets() {
257 int64_t bits = 0; // return value of supported targets.
258 #if HWY_ARCH_X86_64
259 bits |= HWY_SSE2; // always present in x64
260 #endif
262 const uint64_t flags = FlagsFromCPUID();
263 // Set target bit(s) if all their group's flags are all set.
264 if ((flags & kGroupAVX3_DL) == kGroupAVX3_DL) {
265 bits |= HWY_AVX3_DL;
267 if ((flags & kGroupAVX3) == kGroupAVX3) {
268 bits |= HWY_AVX3;
270 if ((flags & kGroupAVX2) == kGroupAVX2) {
271 bits |= HWY_AVX2;
273 if ((flags & kGroupSSE4) == kGroupSSE4) {
274 bits |= HWY_SSE4;
276 if ((flags & kGroupSSSE3) == kGroupSSSE3) {
277 bits |= HWY_SSSE3;
279 #if HWY_ARCH_X86_32
280 if ((flags & kGroupSSE2) == kGroupSSE2) {
281 bits |= HWY_SSE2;
283 #endif
285 // Clear bits if the OS does not support XSAVE - otherwise, registers
286 // are not preserved across context switches.
287 uint32_t abcd[4];
288 Cpuid(1, 0, abcd);
289 const bool has_osxsave = IsBitSet(abcd[2], 27);
290 if (has_osxsave) {
291 const uint32_t xcr0 = ReadXCR0();
292 const int64_t min_avx3 = HWY_AVX3 | HWY_AVX3_DL;
293 const int64_t min_avx2 = HWY_AVX2 | min_avx3;
294 // XMM
295 if (!IsBitSet(xcr0, 1)) {
296 #if HWY_ARCH_X86_64
297 // The HWY_SSE2, HWY_SSSE3, and HWY_SSE4 bits do not need to be
298 // cleared on x86_64, even if bit 1 of XCR0 is not set, as
299 // the lower 128 bits of XMM0-XMM15 are guaranteed to be
300 // preserved across context switches on x86_64
302 // Only clear the AVX2/AVX3 bits on x86_64 if bit 1 of XCR0 is not set
303 bits &= ~min_avx2;
304 #else
305 bits &= ~(HWY_SSE2 | HWY_SSSE3 | HWY_SSE4 | min_avx2);
306 #endif
308 // YMM
309 if (!IsBitSet(xcr0, 2)) {
310 bits &= ~min_avx2;
312 // opmask, ZMM lo/hi
313 if (!IsBitSet(xcr0, 5) || !IsBitSet(xcr0, 6) || !IsBitSet(xcr0, 7)) {
314 bits &= ~min_avx3;
316 } // has_osxsave
318 // This is mainly to work around the slow Zen4 CompressStore. It's unclear
319 // whether subsequent AMD models will be affected; assume yes.
320 if ((bits & HWY_AVX3_DL) && IsAMD()) {
321 bits |= HWY_AVX3_ZEN4;
324 return bits;
327 } // namespace x86
328 #elif HWY_ARCH_ARM && HWY_HAVE_RUNTIME_DISPATCH
329 namespace arm {
330 int64_t DetectTargets() {
331 int64_t bits = 0; // return value of supported targets.
332 using CapBits = unsigned long; // NOLINT
333 const CapBits hw = getauxval(AT_HWCAP);
334 (void)hw;
336 #if HWY_ARCH_ARM_A64
337 bits |= HWY_NEON_WITHOUT_AES; // aarch64 always has NEON and VFPv4..
339 // .. but not necessarily AES, which is required for HWY_NEON.
340 #if defined(HWCAP_AES)
341 if (hw & HWCAP_AES) {
342 bits |= HWY_NEON;
344 #endif // HWCAP_AES
346 #if defined(HWCAP_SVE)
347 if (hw & HWCAP_SVE) {
348 bits |= HWY_SVE;
350 #endif
352 #if defined(HWCAP2_SVE2) && defined(HWCAP2_SVEAES)
353 const CapBits hw2 = getauxval(AT_HWCAP2);
354 if ((hw2 & HWCAP2_SVE2) && (hw2 & HWCAP2_SVEAES)) {
355 bits |= HWY_SVE2;
357 #endif
359 #else // !HWY_ARCH_ARM_A64
361 // Some old auxv.h / hwcap.h do not define these. If not, treat as unsupported.
362 #if defined(HWCAP_NEON) && defined(HWCAP_VFPv4)
363 if ((hw & HWCAP_NEON) && (hw & HWCAP_VFPv4)) {
364 bits |= HWY_NEON_WITHOUT_AES;
366 #endif
368 // aarch32 would check getauxval(AT_HWCAP2) & HWCAP2_AES, but we do not yet
369 // support that platform, and Armv7 lacks AES entirely. Because HWY_NEON
370 // requires native AES instructions, we do not enable that target here.
372 #endif // HWY_ARCH_ARM_A64
373 return bits;
375 } // namespace arm
376 #elif HWY_ARCH_PPC && HWY_HAVE_RUNTIME_DISPATCH
377 namespace ppc {
379 #ifndef PPC_FEATURE_HAS_ALTIVEC
380 #define PPC_FEATURE_HAS_ALTIVEC 0x10000000
381 #endif
383 #ifndef PPC_FEATURE_HAS_VSX
384 #define PPC_FEATURE_HAS_VSX 0x00000080
385 #endif
387 #ifndef PPC_FEATURE2_ARCH_2_07
388 #define PPC_FEATURE2_ARCH_2_07 0x80000000
389 #endif
391 #ifndef PPC_FEATURE2_VEC_CRYPTO
392 #define PPC_FEATURE2_VEC_CRYPTO 0x02000000
393 #endif
395 #ifndef PPC_FEATURE2_ARCH_3_00
396 #define PPC_FEATURE2_ARCH_3_00 0x00800000
397 #endif
399 #ifndef PPC_FEATURE2_ARCH_3_1
400 #define PPC_FEATURE2_ARCH_3_1 0x00040000
401 #endif
403 using CapBits = unsigned long; // NOLINT
405 // For AT_HWCAP, the others are for AT_HWCAP2
406 constexpr CapBits kGroupVSX = PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_VSX;
408 #if defined(HWY_DISABLE_PPC8_CRYPTO)
409 constexpr CapBits kGroupPPC8 = PPC_FEATURE2_ARCH_2_07;
410 #else
411 constexpr CapBits kGroupPPC8 = PPC_FEATURE2_ARCH_2_07 | PPC_FEATURE2_VEC_CRYPTO;
412 #endif
413 constexpr CapBits kGroupPPC9 = kGroupPPC8 | PPC_FEATURE2_ARCH_3_00;
414 constexpr CapBits kGroupPPC10 = kGroupPPC9 | PPC_FEATURE2_ARCH_3_1;
416 int64_t DetectTargets() {
417 int64_t bits = 0; // return value of supported targets.
418 const CapBits hw = getauxval(AT_HWCAP);
420 if ((hw & kGroupVSX) == kGroupVSX) {
421 const CapBits hw2 = getauxval(AT_HWCAP2);
422 if ((hw2 & kGroupPPC8) == kGroupPPC8) {
423 bits |= HWY_PPC8;
425 if ((hw2 & kGroupPPC9) == kGroupPPC9) {
426 bits |= HWY_PPC9;
428 if ((hw2 & kGroupPPC10) == kGroupPPC10) {
429 bits |= HWY_PPC10;
431 } // VSX
432 return bits;
434 } // namespace ppc
435 #endif // HWY_ARCH_X86
437 // Returns targets supported by the CPU, independently of DisableTargets.
438 // Factored out of SupportedTargets to make its structure more obvious. Note
439 // that x86 CPUID may take several hundred cycles.
440 int64_t DetectTargets() {
441 // Apps will use only one of these (the default is EMU128), but compile flags
442 // for this TU may differ from that of the app, so allow both.
443 int64_t bits = HWY_SCALAR | HWY_EMU128;
445 #if HWY_ARCH_X86 && HWY_HAVE_RUNTIME_DISPATCH
446 bits |= x86::DetectTargets();
447 #elif HWY_ARCH_ARM && HWY_HAVE_RUNTIME_DISPATCH
448 bits |= arm::DetectTargets();
449 #elif HWY_ARCH_PPC && HWY_HAVE_RUNTIME_DISPATCH
450 bits |= ppc::DetectTargets();
452 #else
453 // TODO(janwas): detect support for WASM/RVV.
454 // This file is typically compiled without HWY_IS_TEST, but targets_test has
455 // it set, and will expect all of its HWY_TARGETS (= all attainable) to be
456 // supported.
457 bits |= HWY_ENABLED_BASELINE;
458 #endif // HWY_ARCH_*
460 if ((bits & HWY_ENABLED_BASELINE) != HWY_ENABLED_BASELINE) {
461 fprintf(stderr,
462 "WARNING: CPU supports %" PRIx64 " but software requires %" PRIx64
463 "\n",
464 bits, static_cast<int64_t>(HWY_ENABLED_BASELINE));
467 return bits;
470 } // namespace
472 HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4)
473 Abort(const char* file, int line, const char* format, ...) {
474 char buf[2000];
475 va_list args;
476 va_start(args, format);
477 vsnprintf(buf, sizeof(buf), format, args);
478 va_end(args);
480 fprintf(stderr, "Abort at %s:%d: %s\n", file, line, buf);
482 // If compiled with any sanitizer, they can also print a stack trace.
483 #if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
484 __sanitizer_print_stack_trace();
485 #endif // HWY_IS_*
486 fflush(stderr);
488 // Now terminate the program:
489 #if HWY_ARCH_RVV
490 exit(1); // trap/abort just freeze Spike.
491 #elif HWY_IS_DEBUG_BUILD && !HWY_COMPILER_MSVC
492 // Facilitates breaking into a debugger, but don't use this in non-debug
493 // builds because it looks like "illegal instruction", which is misleading.
494 __builtin_trap();
495 #else
496 abort(); // Compile error without this due to HWY_NORETURN.
497 #endif
500 HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets) {
501 supported_mask_ = static_cast<int64_t>(~disabled_targets);
502 // This will take effect on the next call to SupportedTargets, which is
503 // called right before GetChosenTarget::Update. However, calling Update here
504 // would make it appear that HWY_DYNAMIC_DISPATCH was called, which we want
505 // to check in tests. We instead de-initialize such that the next
506 // HWY_DYNAMIC_DISPATCH calls GetChosenTarget::Update via FunctionCache.
507 GetChosenTarget().DeInit();
510 HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets) {
511 supported_targets_for_test_ = targets;
512 GetChosenTarget().DeInit(); // see comment above
515 HWY_DLLEXPORT int64_t SupportedTargets() {
516 int64_t targets = supported_targets_for_test_;
517 if (HWY_LIKELY(targets == 0)) {
518 // Mock not active. Re-detect instead of caching just in case we're on a
519 // heterogeneous ISA (also requires some app support to pin threads). This
520 // is only reached on the first HWY_DYNAMIC_DISPATCH or after each call to
521 // DisableTargets or SetSupportedTargetsForTest.
522 targets = DetectTargets();
524 // VectorBytes invokes HWY_DYNAMIC_DISPATCH. To prevent infinite recursion,
525 // first set up ChosenTarget. No need to Update() again afterwards with the
526 // final targets - that will be done by a caller of this function.
527 GetChosenTarget().Update(targets);
529 // Now that we can call VectorBytes, check for targets with specific sizes.
530 if (HWY_ARCH_ARM_A64) {
531 const size_t vec_bytes = VectorBytes(); // uncached, see declaration
532 if ((targets & HWY_SVE) && vec_bytes == 32) {
533 targets = static_cast<int64_t>(targets | HWY_SVE_256);
534 } else {
535 targets = static_cast<int64_t>(targets & ~HWY_SVE_256);
537 if ((targets & HWY_SVE2) && vec_bytes == 16) {
538 targets = static_cast<int64_t>(targets | HWY_SVE2_128);
539 } else {
540 targets = static_cast<int64_t>(targets & ~HWY_SVE2_128);
542 } // HWY_ARCH_ARM_A64
545 targets &= supported_mask_;
546 return targets == 0 ? HWY_STATIC_TARGET : targets;
549 HWY_DLLEXPORT ChosenTarget& GetChosenTarget() {
550 static ChosenTarget chosen_target;
551 return chosen_target;
554 } // namespace hwy