1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "jit/ShuffleAnalysis.h"
7 #include "mozilla/MathAlgorithms.h"
9 #include "wasm/WasmFeatures.h"
15 using mozilla::Nothing
;
18 #ifdef ENABLE_WASM_SIMD
20 // Specialization analysis for SIMD operations. This is still x86-centric but
21 // generalizes fairly easily to other architectures.
23 // Optimization of v8x16.shuffle. The general byte shuffle+blend is very
24 // expensive (equivalent to at least a dozen instructions), and we want to avoid
25 // that if we can. So look for special cases - there are many.
27 // The strategy is to sort the operation into one of three buckets depending
28 // on the shuffle pattern and inputs:
30 // - single operand; shuffles on these values are rotations, reversals,
31 // transpositions, and general permutations
32 // - single-operand-with-interesting-constant (especially zero); shuffles on
33 // these values are often byte shift or scatter operations
34 // - dual operand; shuffles on these operations are blends, catenated
35 // shifts, and (in the worst case) general shuffle+blends
37 // We're not trying to solve the general problem, only to lower reasonably
38 // expressed patterns that express common operations. Producers that produce
39 // dense and convoluted patterns will end up with the general byte shuffle.
40 // Producers that produce simpler patterns that easily map to hardware will
43 // In particular, these matchers do not try to combine transformations, so a
44 // shuffle that optimally is lowered to rotate + permute32x4 + rotate, say, is
45 // usually going to end up as a general byte shuffle.
47 // Reduce a 0..31 byte mask to a 0..15 word mask if possible and if so return
48 // true, updating *control.
49 static bool ByteMaskToWordMask(SimdConstant
* control
) {
50 const SimdConstant::I8x16
& lanes
= control
->asInt8x16();
51 int16_t controlWords
[8];
52 for (int i
= 0; i
< 16; i
+= 2) {
53 if (!((lanes
[i
] & 1) == 0 && lanes
[i
+ 1] == lanes
[i
] + 1)) {
56 controlWords
[i
/ 2] = int16_t(lanes
[i
] / 2);
58 *control
= SimdConstant::CreateX8(controlWords
);
62 // Reduce a 0..31 byte mask to a 0..7 dword mask if possible and if so return
63 // true, updating *control.
64 static bool ByteMaskToDWordMask(SimdConstant
* control
) {
65 const SimdConstant::I8x16
& lanes
= control
->asInt8x16();
66 int32_t controlDWords
[4];
67 for (int i
= 0; i
< 16; i
+= 4) {
68 if (!((lanes
[i
] & 3) == 0 && lanes
[i
+ 1] == lanes
[i
] + 1 &&
69 lanes
[i
+ 2] == lanes
[i
] + 2 && lanes
[i
+ 3] == lanes
[i
] + 3)) {
72 controlDWords
[i
/ 4] = lanes
[i
] / 4;
74 *control
= SimdConstant::CreateX4(controlDWords
);
78 // Reduce a 0..31 byte mask to a 0..3 qword mask if possible and if so return
79 // true, updating *control.
80 static bool ByteMaskToQWordMask(SimdConstant
* control
) {
81 const SimdConstant::I8x16
& lanes
= control
->asInt8x16();
82 int64_t controlQWords
[2];
83 for (int i
= 0; i
< 16; i
+= 8) {
84 if (!((lanes
[i
] & 7) == 0 && lanes
[i
+ 1] == lanes
[i
] + 1 &&
85 lanes
[i
+ 2] == lanes
[i
] + 2 && lanes
[i
+ 3] == lanes
[i
] + 3 &&
86 lanes
[i
+ 4] == lanes
[i
] + 4 && lanes
[i
+ 5] == lanes
[i
] + 5 &&
87 lanes
[i
+ 6] == lanes
[i
] + 6 && lanes
[i
+ 7] == lanes
[i
] + 7)) {
90 controlQWords
[i
/ 8] = lanes
[i
] / 8;
92 *control
= SimdConstant::CreateX2(controlQWords
);
96 // Skip across consecutive values in lanes starting at i, returning the index
97 // after the last element. Lane values must be <= len-1 ("masked").
99 // Since every element is a 1-element run, the return value is never the same as
101 template <typename T
>
102 static int ScanIncreasingMasked(const T
* lanes
, int i
) {
103 int len
= int(16 / sizeof(T
));
105 MOZ_ASSERT(lanes
[i
] <= len
- 1);
107 while (i
< len
&& lanes
[i
] == lanes
[i
- 1] + 1) {
108 MOZ_ASSERT(lanes
[i
] <= len
- 1);
114 // Skip across consecutive values in lanes starting at i, returning the index
115 // after the last element. Lane values must be <= len*2-1 ("unmasked"); the
116 // values len-1 and len are not considered consecutive.
118 // Since every element is a 1-element run, the return value is never the same as
120 template <typename T
>
121 static int ScanIncreasingUnmasked(const T
* lanes
, int i
) {
122 int len
= int(16 / sizeof(T
));
124 if (lanes
[i
] < len
) {
126 while (i
< len
&& lanes
[i
] < len
&& lanes
[i
- 1] == lanes
[i
] - 1) {
131 while (i
< len
&& lanes
[i
] >= len
&& lanes
[i
- 1] == lanes
[i
] - 1) {
138 // Skip lanes that equal v starting at i, returning the index just beyond the
139 // last of those. There is no requirement that the initial lanes[i] == v.
140 template <typename T
>
141 static int ScanConstant(const T
* lanes
, int v
, int i
) {
142 int len
= int(16 / sizeof(T
));
143 MOZ_ASSERT(i
<= len
);
144 while (i
< len
&& lanes
[i
] == v
) {
150 // Mask lane values denoting rhs elements into lhs elements.
151 template <typename T
>
152 static void MaskLanes(T
* result
, const T
* input
) {
153 int len
= int(16 / sizeof(T
));
154 for (int i
= 0; i
< len
; i
++) {
155 result
[i
] = input
[i
] & (len
- 1);
159 // Apply a transformation to each lane value.
160 template <typename T
>
161 static void MapLanes(T
* result
, const T
* input
, int (*f
)(int)) {
162 // Hazard analysis trips on "IndirectCall: f" error.
163 // Suppress the check -- `f` is expected to be trivial here.
164 JS::AutoSuppressGCAnalysis nogc
;
166 int len
= int(16 / sizeof(T
));
167 for (int i
= 0; i
< len
; i
++) {
168 result
[i
] = f(input
[i
]);
172 // Recognize an identity permutation, assuming lanes is masked.
173 template <typename T
>
174 static bool IsIdentity(const T
* lanes
) {
175 return ScanIncreasingMasked(lanes
, 0) == int(16 / sizeof(T
));
178 // Recognize part of an identity permutation starting at start, with
179 // the first value of the permutation expected to be bias.
180 template <typename T
>
181 static bool IsIdentity(const T
* lanes
, int start
, int len
, int bias
) {
182 if (lanes
[start
] != bias
) {
185 for (int i
= start
+ 1; i
< start
+ len
; i
++) {
186 if (lanes
[i
] != lanes
[i
- 1] + 1) {
193 // We can permute by dwords if the mask is reducible to a dword mask, and in
194 // this case a single PSHUFD is enough.
195 static bool TryPermute32x4(SimdConstant
* control
) {
196 SimdConstant tmp
= *control
;
197 if (!ByteMaskToDWordMask(&tmp
)) {
204 // Can we perform a byte rotate right? We can use PALIGNR. The shift count is
205 // just lanes[0], and *control is unchanged.
206 static bool TryRotateRight8x16(SimdConstant
* control
) {
207 const SimdConstant::I8x16
& lanes
= control
->asInt8x16();
208 // Look for the end of the first run of consecutive bytes.
209 int i
= ScanIncreasingMasked(lanes
, 0);
211 // First run must start at a value s.t. we have a rotate if all remaining
213 if (lanes
[0] != 16 - i
) {
217 // If we reached the end of the vector, we're done.
222 // Second run must start at source lane zero.
227 // Second run must end at the end of the lane vector.
228 return ScanIncreasingMasked(lanes
, i
) == 16;
231 // We can permute by words if the mask is reducible to a word mask.
232 static bool TryPermute16x8(SimdConstant
* control
) {
233 SimdConstant tmp
= *control
;
234 if (!ByteMaskToWordMask(&tmp
)) {
241 // A single word lane is copied into all the other lanes: PSHUF*W + PSHUFD.
242 static bool TryBroadcast16x8(SimdConstant
* control
) {
243 SimdConstant tmp
= *control
;
244 if (!ByteMaskToWordMask(&tmp
)) {
247 const SimdConstant::I16x8
& lanes
= tmp
.asInt16x8();
248 if (ScanConstant(lanes
, lanes
[0], 0) < 8) {
255 // A single byte lane is copied int all the other lanes: PUNPCK*BW + PSHUF*W +
257 static bool TryBroadcast8x16(SimdConstant
* control
) {
258 const SimdConstant::I8x16
& lanes
= control
->asInt8x16();
259 return ScanConstant(lanes
, lanes
[0], 0) >= 16;
263 static bool TryReverse(SimdConstant
* control
) {
264 const SimdConstant::I8x16
& lanes
= control
->asInt8x16();
265 for (int i
= 0; i
< 16; i
++) {
266 if (lanes
[i
] != (i
^ (N
- 1))) {
273 // Look for permutations of a single operand.
274 static SimdPermuteOp
AnalyzePermute(SimdConstant
* control
) {
275 // Lane indices are input-agnostic for single-operand permutations.
276 SimdConstant::I8x16 controlBytes
;
277 MaskLanes(controlBytes
, control
->asInt8x16());
279 // Get rid of no-ops immediately, so nobody else needs to check.
280 if (IsIdentity(controlBytes
)) {
281 return SimdPermuteOp::MOVE
;
284 // Default control is the masked bytes.
285 *control
= SimdConstant::CreateX16(controlBytes
);
287 // Analysis order matters here and is architecture-dependent or even
288 // microarchitecture-dependent: ideally the cheapest implementation first.
289 // The Intel manual says that the cost of a PSHUFB is about five other
290 // operations, so make that our cutoff.
292 // Word, dword, and qword reversals are handled optimally by general permutes.
294 // Byte reversals are probably best left to PSHUFB, no alternative rendition
295 // seems to reliably go below five instructions. (Discuss.)
297 // Word swaps within doublewords and dword swaps within quadwords are handled
298 // optimally by general permutes.
300 // Dword and qword broadcasts are handled by dword permute.
302 if (TryPermute32x4(control
)) {
303 return SimdPermuteOp::PERMUTE_32x4
;
305 if (TryRotateRight8x16(control
)) {
306 return SimdPermuteOp::ROTATE_RIGHT_8x16
;
308 if (TryBroadcast16x8(control
)) {
309 return SimdPermuteOp::BROADCAST_16x8
;
311 if (TryPermute16x8(control
)) {
312 return SimdPermuteOp::PERMUTE_16x8
;
314 if (TryBroadcast8x16(control
)) {
315 return SimdPermuteOp::BROADCAST_8x16
;
317 if (TryReverse
<2>(control
)) {
318 return SimdPermuteOp::REVERSE_16x8
;
320 if (TryReverse
<4>(control
)) {
321 return SimdPermuteOp::REVERSE_32x4
;
323 if (TryReverse
<8>(control
)) {
324 return SimdPermuteOp::REVERSE_64x2
;
327 // TODO: (From v8) Unzip and transpose generally have renditions that slightly
328 // beat a general permute (three or four instructions)
330 // TODO: (From MacroAssemblerX86Shared::ShuffleX4): MOVLHPS and MOVHLPS can be
331 // used when merging two values.
333 // The default operation is to permute bytes with the default control.
334 return SimdPermuteOp::PERMUTE_8x16
;
337 // Can we shift the bytes left or right by a constant? A shift is a run of
338 // lanes from the rhs (which is zero) on one end and a run of values from the
339 // lhs on the other end.
340 static Maybe
<SimdPermuteOp
> TryShift8x16(SimdConstant
* control
) {
341 const SimdConstant::I8x16
& lanes
= control
->asInt8x16();
343 // Represent all zero lanes by 16
344 SimdConstant::I8x16 zeroesMasked
;
345 MapLanes(zeroesMasked
, lanes
, [](int x
) -> int { return x
>= 16 ? 16 : x
; });
347 int i
= ScanConstant(zeroesMasked
, 16, 0);
349 if (shiftLeft
> 0 && lanes
[shiftLeft
] != 0) {
353 i
= ScanIncreasingUnmasked(zeroesMasked
, i
);
354 int shiftRight
= 16 - i
;
355 if (shiftRight
> 0 && lanes
[i
- 1] != 15) {
359 i
= ScanConstant(zeroesMasked
, 16, i
);
360 if (i
< 16 || (shiftRight
> 0 && shiftLeft
> 0) ||
361 (shiftRight
== 0 && shiftLeft
== 0)) {
366 *control
= SimdConstant::SplatX16((int8_t)shiftRight
);
367 return Some(SimdPermuteOp::SHIFT_RIGHT_8x16
);
369 *control
= SimdConstant::SplatX16((int8_t)shiftLeft
);
370 return Some(SimdPermuteOp::SHIFT_LEFT_8x16
);
373 // Check if it is unsigned integer extend operation.
374 static Maybe
<SimdPermuteOp
> TryZeroExtend(SimdConstant
* control
) {
375 const SimdConstant::I8x16
& lanes
= control
->asInt8x16();
377 // Find fragment of sequantial lanes indices that starts from 0.
379 for (; i
<= 4 && lanes
[i
] == int8_t(i
); i
++) {
381 // The length of the fragment has to be a power of 2, and next item is zero.
382 if (!mozilla::IsPowerOfTwo(i
) || lanes
[i
] < 16) {
385 MOZ_ASSERT(i
> 0 && i
<= 4);
386 uint32_t fromLen
= i
;
387 // Skip items that will be zero'ed.
388 for (; i
<= 8 && lanes
[i
] >= 16; i
++) {
390 // The length of the entire fragment of zero and non-zero items
391 // needs to be power of 2.
392 if (!mozilla::IsPowerOfTwo(i
)) {
395 MOZ_ASSERT(i
> fromLen
&& i
<= 8);
398 // The sequence will repeat every toLen elements: in which first
399 // fromLen items are sequential lane indices, and the rest are zeros.
400 int8_t current
= int8_t(fromLen
);
401 for (; i
< 16; i
++) {
402 if ((i
% toLen
) >= fromLen
) {
403 // Expect the item be a zero.
408 // Check the item is in ascending sequence.
409 if (lanes
[i
] != current
) {
420 return Some(SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8
);
422 return Some(SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4
);
424 return Some(SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2
);
430 return Some(SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4
);
432 return Some(SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2
);
438 return Some(SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2
);
442 MOZ_CRASH("Invalid TryZeroExtend match");
445 static Maybe
<SimdPermuteOp
> AnalyzeShuffleWithZero(SimdConstant
* control
) {
446 Maybe
<SimdPermuteOp
> op
;
447 op
= TryShift8x16(control
);
452 op
= TryZeroExtend(control
);
457 // TODO: Optimization opportunity? A byte-blend-with-zero is just a CONST;
458 // PAND. This may beat the general byte blend code below.
462 // Concat: if the result is the suffix (high bytes) of the rhs in front of a
463 // prefix (low bytes) of the lhs then this is PALIGNR; ditto if the operands are
465 static Maybe
<SimdShuffleOp
> TryConcatRightShift8x16(SimdConstant
* control
,
466 bool* swapOperands
) {
467 const SimdConstant::I8x16
& lanes
= control
->asInt8x16();
468 int i
= ScanIncreasingUnmasked(lanes
, 0);
469 MOZ_ASSERT(i
< 16, "Single-operand run should have been handled elswhere");
470 // First run must end with 15 % 16
471 if ((lanes
[i
- 1] & 15) != 15) {
474 // Second run must start with 0 % 16
475 if ((lanes
[i
] & 15) != 0) {
478 // The two runs must come from different inputs
479 if ((lanes
[i
] & 16) == (lanes
[i
- 1] & 16)) {
482 int suffixLength
= i
;
484 i
= ScanIncreasingUnmasked(lanes
, i
);
485 // Must end at the left end
490 // If the suffix is from the lhs then swap the operands
492 *swapOperands
= !*swapOperands
;
494 *control
= SimdConstant::SplatX16((int8_t)suffixLength
);
495 return Some(SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16
);
498 // Blend words: if we pick words from both operands without a pattern but all
499 // the input words stay in their position then this is PBLENDW (immediate mask);
500 // this also handles all larger sizes on x64.
501 static Maybe
<SimdShuffleOp
> TryBlendInt16x8(SimdConstant
* control
) {
502 SimdConstant
tmp(*control
);
503 if (!ByteMaskToWordMask(&tmp
)) {
506 SimdConstant::I16x8 masked
;
507 MaskLanes(masked
, tmp
.asInt16x8());
508 if (!IsIdentity(masked
)) {
511 SimdConstant::I16x8 mapped
;
512 MapLanes(mapped
, tmp
.asInt16x8(),
513 [](int x
) -> int { return x
< 8 ? 0 : -1; });
514 *control
= SimdConstant::CreateX8(mapped
);
515 return Some(SimdShuffleOp::BLEND_16x8
);
518 // Blend bytes: if we pick bytes ditto then this is a byte blend, which can be
519 // handled with a CONST, PAND, PANDNOT, and POR.
521 // TODO: Optimization opportunity? If we pick all but one lanes from one with at
522 // most one from the other then it could be a MOV + PEXRB + PINSRB (also if this
523 // element is not in its source location).
524 static Maybe
<SimdShuffleOp
> TryBlendInt8x16(SimdConstant
* control
) {
525 SimdConstant::I8x16 masked
;
526 MaskLanes(masked
, control
->asInt8x16());
527 if (!IsIdentity(masked
)) {
530 SimdConstant::I8x16 mapped
;
531 MapLanes(mapped
, control
->asInt8x16(),
532 [](int x
) -> int { return x
< 16 ? 0 : -1; });
533 *control
= SimdConstant::CreateX16(mapped
);
534 return Some(SimdShuffleOp::BLEND_8x16
);
537 template <typename T
>
538 static bool MatchInterleave(const T
* lanes
, int lhs
, int rhs
, int len
) {
539 for (int i
= 0; i
< len
; i
++) {
540 if (lanes
[i
* 2] != lhs
+ i
|| lanes
[i
* 2 + 1] != rhs
+ i
) {
547 // Unpack/interleave:
548 // - if we interleave the low (bytes/words/doublewords) of the inputs into
549 // the output then this is UNPCKL*W (possibly with a swap of operands).
550 // - if we interleave the high ditto then it is UNPCKH*W (ditto)
551 template <typename T
>
552 static Maybe
<SimdShuffleOp
> TryInterleave(const T
* lanes
, int lhs
, int rhs
,
555 SimdShuffleOp highOp
) {
556 int len
= int(32 / (sizeof(T
) * 4));
557 if (MatchInterleave(lanes
, lhs
, rhs
, len
)) {
560 if (MatchInterleave(lanes
, rhs
, lhs
, len
)) {
561 *swapOperands
= !*swapOperands
;
564 if (MatchInterleave(lanes
, lhs
+ len
, rhs
+ len
, len
)) {
567 if (MatchInterleave(lanes
, rhs
+ len
, lhs
+ len
, len
)) {
568 *swapOperands
= !*swapOperands
;
574 static Maybe
<SimdShuffleOp
> TryInterleave64x2(SimdConstant
* control
,
575 bool* swapOperands
) {
576 SimdConstant tmp
= *control
;
577 if (!ByteMaskToQWordMask(&tmp
)) {
580 const SimdConstant::I64x2
& lanes
= tmp
.asInt64x2();
581 return TryInterleave(lanes
, 0, 2, swapOperands
,
582 SimdShuffleOp::INTERLEAVE_LOW_64x2
,
583 SimdShuffleOp::INTERLEAVE_HIGH_64x2
);
586 static Maybe
<SimdShuffleOp
> TryInterleave32x4(SimdConstant
* control
,
587 bool* swapOperands
) {
588 SimdConstant tmp
= *control
;
589 if (!ByteMaskToDWordMask(&tmp
)) {
592 const SimdConstant::I32x4
& lanes
= tmp
.asInt32x4();
593 return TryInterleave(lanes
, 0, 4, swapOperands
,
594 SimdShuffleOp::INTERLEAVE_LOW_32x4
,
595 SimdShuffleOp::INTERLEAVE_HIGH_32x4
);
598 static Maybe
<SimdShuffleOp
> TryInterleave16x8(SimdConstant
* control
,
599 bool* swapOperands
) {
600 SimdConstant tmp
= *control
;
601 if (!ByteMaskToWordMask(&tmp
)) {
604 const SimdConstant::I16x8
& lanes
= tmp
.asInt16x8();
605 return TryInterleave(lanes
, 0, 8, swapOperands
,
606 SimdShuffleOp::INTERLEAVE_LOW_16x8
,
607 SimdShuffleOp::INTERLEAVE_HIGH_16x8
);
610 static Maybe
<SimdShuffleOp
> TryInterleave8x16(SimdConstant
* control
,
611 bool* swapOperands
) {
612 const SimdConstant::I8x16
& lanes
= control
->asInt8x16();
613 return TryInterleave(lanes
, 0, 16, swapOperands
,
614 SimdShuffleOp::INTERLEAVE_LOW_8x16
,
615 SimdShuffleOp::INTERLEAVE_HIGH_8x16
);
618 static SimdShuffleOp
AnalyzeTwoArgShuffle(SimdConstant
* control
,
619 bool* swapOperands
) {
620 Maybe
<SimdShuffleOp
> op
;
621 op
= TryConcatRightShift8x16(control
, swapOperands
);
623 op
= TryBlendInt16x8(control
);
626 op
= TryBlendInt8x16(control
);
629 op
= TryInterleave64x2(control
, swapOperands
);
632 op
= TryInterleave32x4(control
, swapOperands
);
635 op
= TryInterleave16x8(control
, swapOperands
);
638 op
= TryInterleave8x16(control
, swapOperands
);
641 op
= Some(SimdShuffleOp::SHUFFLE_BLEND_8x16
);
646 // Reorder the operands if that seems useful, notably, move a constant to the
647 // right hand side. Rewrites the control to account for any move.
648 static bool MaybeReorderShuffleOperands(MDefinition
** lhs
, MDefinition
** rhs
,
649 SimdConstant
* control
) {
650 if ((*lhs
)->isWasmFloatConstant()) {
651 MDefinition
* tmp
= *lhs
;
655 int8_t controlBytes
[16];
656 const SimdConstant::I8x16
& lanes
= control
->asInt8x16();
657 for (unsigned i
= 0; i
< 16; i
++) {
658 controlBytes
[i
] = int8_t(lanes
[i
] ^ 16);
660 *control
= SimdConstant::CreateX16(controlBytes
);
668 static const SimdShuffle
& ReportShuffleSpecialization(const SimdShuffle
& s
) {
670 case SimdShuffle::Operand::BOTH
:
671 case SimdShuffle::Operand::BOTH_SWAPPED
:
672 switch (*s
.shuffleOp
) {
673 case SimdShuffleOp::SHUFFLE_BLEND_8x16
:
674 js::wasm::ReportSimdAnalysis("shuffle -> shuffle+blend 8x16");
676 case SimdShuffleOp::BLEND_8x16
:
677 js::wasm::ReportSimdAnalysis("shuffle -> blend 8x16");
679 case SimdShuffleOp::BLEND_16x8
:
680 js::wasm::ReportSimdAnalysis("shuffle -> blend 16x8");
682 case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16
:
683 js::wasm::ReportSimdAnalysis("shuffle -> concat+shift-right 8x16");
685 case SimdShuffleOp::INTERLEAVE_HIGH_8x16
:
686 js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 8x16");
688 case SimdShuffleOp::INTERLEAVE_HIGH_16x8
:
689 js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 16x8");
691 case SimdShuffleOp::INTERLEAVE_HIGH_32x4
:
692 js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 32x4");
694 case SimdShuffleOp::INTERLEAVE_HIGH_64x2
:
695 js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 64x2");
697 case SimdShuffleOp::INTERLEAVE_LOW_8x16
:
698 js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 8x16");
700 case SimdShuffleOp::INTERLEAVE_LOW_16x8
:
701 js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 16x8");
703 case SimdShuffleOp::INTERLEAVE_LOW_32x4
:
704 js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 32x4");
706 case SimdShuffleOp::INTERLEAVE_LOW_64x2
:
707 js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 64x2");
710 MOZ_CRASH("Unexpected shuffle op");
713 case SimdShuffle::Operand::LEFT
:
714 case SimdShuffle::Operand::RIGHT
:
715 switch (*s
.permuteOp
) {
716 case SimdPermuteOp::BROADCAST_8x16
:
717 js::wasm::ReportSimdAnalysis("shuffle -> broadcast 8x16");
719 case SimdPermuteOp::BROADCAST_16x8
:
720 js::wasm::ReportSimdAnalysis("shuffle -> broadcast 16x8");
722 case SimdPermuteOp::MOVE
:
723 js::wasm::ReportSimdAnalysis("shuffle -> move");
725 case SimdPermuteOp::REVERSE_16x8
:
726 js::wasm::ReportSimdAnalysis(
727 "shuffle -> reverse bytes in 16-bit lanes");
729 case SimdPermuteOp::REVERSE_32x4
:
730 js::wasm::ReportSimdAnalysis(
731 "shuffle -> reverse bytes in 32-bit lanes");
733 case SimdPermuteOp::REVERSE_64x2
:
734 js::wasm::ReportSimdAnalysis(
735 "shuffle -> reverse bytes in 64-bit lanes");
737 case SimdPermuteOp::PERMUTE_8x16
:
738 js::wasm::ReportSimdAnalysis("shuffle -> permute 8x16");
740 case SimdPermuteOp::PERMUTE_16x8
:
741 js::wasm::ReportSimdAnalysis("shuffle -> permute 16x8");
743 case SimdPermuteOp::PERMUTE_32x4
:
744 js::wasm::ReportSimdAnalysis("shuffle -> permute 32x4");
746 case SimdPermuteOp::ROTATE_RIGHT_8x16
:
747 js::wasm::ReportSimdAnalysis("shuffle -> rotate-right 8x16");
749 case SimdPermuteOp::SHIFT_LEFT_8x16
:
750 js::wasm::ReportSimdAnalysis("shuffle -> shift-left 8x16");
752 case SimdPermuteOp::SHIFT_RIGHT_8x16
:
753 js::wasm::ReportSimdAnalysis("shuffle -> shift-right 8x16");
755 case SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8
:
756 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 8x16 to 16x8");
758 case SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4
:
759 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 8x16 to 32x4");
761 case SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2
:
762 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 8x16 to 64x2");
764 case SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4
:
765 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 16x8 to 32x4");
767 case SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2
:
768 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 16x8 to 64x2");
770 case SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2
:
771 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 32x4 to 64x2");
774 MOZ_CRASH("Unexpected permute op");
782 SimdShuffle
jit::AnalyzeSimdShuffle(SimdConstant control
, MDefinition
* lhs
,
785 # define R(s) ReportShuffleSpecialization(s)
790 // If only one of the inputs is used, determine which.
792 bool useRight
= true;
796 bool allAbove
= true;
797 bool allBelow
= true;
798 const SimdConstant::I8x16
& lanes
= control
.asInt8x16();
799 for (int8_t i
: lanes
) {
800 allAbove
= allAbove
&& i
>= 16;
801 allBelow
= allBelow
&& i
< 16;
805 } else if (allBelow
) {
810 // Deal with one-ignored-input.
811 if (!(useLeft
&& useRight
)) {
812 SimdPermuteOp op
= AnalyzePermute(&control
);
813 return R(SimdShuffle::permute(
814 useLeft
? SimdShuffle::Operand::LEFT
: SimdShuffle::Operand::RIGHT
,
818 // Move constants to rhs.
819 bool swapOperands
= MaybeReorderShuffleOperands(&lhs
, &rhs
, &control
);
821 // Deal with constant rhs.
822 if (rhs
->isWasmFloatConstant()) {
823 SimdConstant rhsConstant
= rhs
->toWasmFloatConstant()->toSimd128();
824 if (rhsConstant
.isZeroBits()) {
825 Maybe
<SimdPermuteOp
> op
= AnalyzeShuffleWithZero(&control
);
827 return R(SimdShuffle::permute(swapOperands
? SimdShuffle::Operand::RIGHT
828 : SimdShuffle::Operand::LEFT
,
834 // Two operands both of which are used. If there's one constant operand it is
836 SimdShuffleOp op
= AnalyzeTwoArgShuffle(&control
, &swapOperands
);
837 return R(SimdShuffle::shuffle(swapOperands
838 ? SimdShuffle::Operand::BOTH_SWAPPED
839 : SimdShuffle::Operand::BOTH
,
844 #endif // ENABLE_WASM_SIMD