Bug 1874684 - Part 31: Correctly reject invalid durations in some RoundDuration calls...
[gecko.git] / js / src / jit / ShuffleAnalysis.cpp
blobeb39a776b27db2d25b47e67939048e3ec4be3486
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "jit/ShuffleAnalysis.h"
7 #include "mozilla/MathAlgorithms.h"
8 #include "jit/MIR.h"
9 #include "wasm/WasmFeatures.h"
11 using namespace js;
12 using namespace jit;
14 using mozilla::Maybe;
15 using mozilla::Nothing;
16 using mozilla::Some;
18 #ifdef ENABLE_WASM_SIMD
20 // Specialization analysis for SIMD operations. This is still x86-centric but
21 // generalizes fairly easily to other architectures.
23 // Optimization of v8x16.shuffle. The general byte shuffle+blend is very
24 // expensive (equivalent to at least a dozen instructions), and we want to avoid
25 // that if we can. So look for special cases - there are many.
27 // The strategy is to sort the operation into one of three buckets depending
28 // on the shuffle pattern and inputs:
30 // - single operand; shuffles on these values are rotations, reversals,
31 // transpositions, and general permutations
32 // - single-operand-with-interesting-constant (especially zero); shuffles on
33 // these values are often byte shift or scatter operations
34 // - dual operand; shuffles on these operations are blends, catenated
35 // shifts, and (in the worst case) general shuffle+blends
37 // We're not trying to solve the general problem, only to lower reasonably
38 // expressed patterns that express common operations. Producers that produce
39 // dense and convoluted patterns will end up with the general byte shuffle.
40 // Producers that produce simpler patterns that easily map to hardware will
41 // get faster code.
43 // In particular, these matchers do not try to combine transformations, so a
44 // shuffle that optimally is lowered to rotate + permute32x4 + rotate, say, is
45 // usually going to end up as a general byte shuffle.
47 // Reduce a 0..31 byte mask to a 0..15 word mask if possible and if so return
48 // true, updating *control.
49 static bool ByteMaskToWordMask(SimdConstant* control) {
50 const SimdConstant::I8x16& lanes = control->asInt8x16();
51 int16_t controlWords[8];
52 for (int i = 0; i < 16; i += 2) {
53 if (!((lanes[i] & 1) == 0 && lanes[i + 1] == lanes[i] + 1)) {
54 return false;
56 controlWords[i / 2] = int16_t(lanes[i] / 2);
58 *control = SimdConstant::CreateX8(controlWords);
59 return true;
62 // Reduce a 0..31 byte mask to a 0..7 dword mask if possible and if so return
63 // true, updating *control.
64 static bool ByteMaskToDWordMask(SimdConstant* control) {
65 const SimdConstant::I8x16& lanes = control->asInt8x16();
66 int32_t controlDWords[4];
67 for (int i = 0; i < 16; i += 4) {
68 if (!((lanes[i] & 3) == 0 && lanes[i + 1] == lanes[i] + 1 &&
69 lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3)) {
70 return false;
72 controlDWords[i / 4] = lanes[i] / 4;
74 *control = SimdConstant::CreateX4(controlDWords);
75 return true;
78 // Reduce a 0..31 byte mask to a 0..3 qword mask if possible and if so return
79 // true, updating *control.
80 static bool ByteMaskToQWordMask(SimdConstant* control) {
81 const SimdConstant::I8x16& lanes = control->asInt8x16();
82 int64_t controlQWords[2];
83 for (int i = 0; i < 16; i += 8) {
84 if (!((lanes[i] & 7) == 0 && lanes[i + 1] == lanes[i] + 1 &&
85 lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3 &&
86 lanes[i + 4] == lanes[i] + 4 && lanes[i + 5] == lanes[i] + 5 &&
87 lanes[i + 6] == lanes[i] + 6 && lanes[i + 7] == lanes[i] + 7)) {
88 return false;
90 controlQWords[i / 8] = lanes[i] / 8;
92 *control = SimdConstant::CreateX2(controlQWords);
93 return true;
96 // Skip across consecutive values in lanes starting at i, returning the index
97 // after the last element. Lane values must be <= len-1 ("masked").
99 // Since every element is a 1-element run, the return value is never the same as
100 // the starting i.
101 template <typename T>
102 static int ScanIncreasingMasked(const T* lanes, int i) {
103 int len = int(16 / sizeof(T));
104 MOZ_ASSERT(i < len);
105 MOZ_ASSERT(lanes[i] <= len - 1);
106 i++;
107 while (i < len && lanes[i] == lanes[i - 1] + 1) {
108 MOZ_ASSERT(lanes[i] <= len - 1);
109 i++;
111 return i;
114 // Skip across consecutive values in lanes starting at i, returning the index
115 // after the last element. Lane values must be <= len*2-1 ("unmasked"); the
116 // values len-1 and len are not considered consecutive.
118 // Since every element is a 1-element run, the return value is never the same as
119 // the starting i.
120 template <typename T>
121 static int ScanIncreasingUnmasked(const T* lanes, int i) {
122 int len = int(16 / sizeof(T));
123 MOZ_ASSERT(i < len);
124 if (lanes[i] < len) {
125 i++;
126 while (i < len && lanes[i] < len && lanes[i - 1] == lanes[i] - 1) {
127 i++;
129 } else {
130 i++;
131 while (i < len && lanes[i] >= len && lanes[i - 1] == lanes[i] - 1) {
132 i++;
135 return i;
138 // Skip lanes that equal v starting at i, returning the index just beyond the
139 // last of those. There is no requirement that the initial lanes[i] == v.
140 template <typename T>
141 static int ScanConstant(const T* lanes, int v, int i) {
142 int len = int(16 / sizeof(T));
143 MOZ_ASSERT(i <= len);
144 while (i < len && lanes[i] == v) {
145 i++;
147 return i;
150 // Mask lane values denoting rhs elements into lhs elements.
151 template <typename T>
152 static void MaskLanes(T* result, const T* input) {
153 int len = int(16 / sizeof(T));
154 for (int i = 0; i < len; i++) {
155 result[i] = input[i] & (len - 1);
159 // Apply a transformation to each lane value.
160 template <typename T>
161 static void MapLanes(T* result, const T* input, int (*f)(int)) {
162 // Hazard analysis trips on "IndirectCall: f" error.
163 // Suppress the check -- `f` is expected to be trivial here.
164 JS::AutoSuppressGCAnalysis nogc;
166 int len = int(16 / sizeof(T));
167 for (int i = 0; i < len; i++) {
168 result[i] = f(input[i]);
172 // Recognize an identity permutation, assuming lanes is masked.
173 template <typename T>
174 static bool IsIdentity(const T* lanes) {
175 return ScanIncreasingMasked(lanes, 0) == int(16 / sizeof(T));
178 // Recognize part of an identity permutation starting at start, with
179 // the first value of the permutation expected to be bias.
180 template <typename T>
181 static bool IsIdentity(const T* lanes, int start, int len, int bias) {
182 if (lanes[start] != bias) {
183 return false;
185 for (int i = start + 1; i < start + len; i++) {
186 if (lanes[i] != lanes[i - 1] + 1) {
187 return false;
190 return true;
193 // We can permute by dwords if the mask is reducible to a dword mask, and in
194 // this case a single PSHUFD is enough.
195 static bool TryPermute32x4(SimdConstant* control) {
196 SimdConstant tmp = *control;
197 if (!ByteMaskToDWordMask(&tmp)) {
198 return false;
200 *control = tmp;
201 return true;
204 // Can we perform a byte rotate right? We can use PALIGNR. The shift count is
205 // just lanes[0], and *control is unchanged.
206 static bool TryRotateRight8x16(SimdConstant* control) {
207 const SimdConstant::I8x16& lanes = control->asInt8x16();
208 // Look for the end of the first run of consecutive bytes.
209 int i = ScanIncreasingMasked(lanes, 0);
211 // First run must start at a value s.t. we have a rotate if all remaining
212 // bytes are a run.
213 if (lanes[0] != 16 - i) {
214 return false;
217 // If we reached the end of the vector, we're done.
218 if (i == 16) {
219 return true;
222 // Second run must start at source lane zero.
223 if (lanes[i] != 0) {
224 return false;
227 // Second run must end at the end of the lane vector.
228 return ScanIncreasingMasked(lanes, i) == 16;
231 // We can permute by words if the mask is reducible to a word mask.
232 static bool TryPermute16x8(SimdConstant* control) {
233 SimdConstant tmp = *control;
234 if (!ByteMaskToWordMask(&tmp)) {
235 return false;
237 *control = tmp;
238 return true;
241 // A single word lane is copied into all the other lanes: PSHUF*W + PSHUFD.
242 static bool TryBroadcast16x8(SimdConstant* control) {
243 SimdConstant tmp = *control;
244 if (!ByteMaskToWordMask(&tmp)) {
245 return false;
247 const SimdConstant::I16x8& lanes = tmp.asInt16x8();
248 if (ScanConstant(lanes, lanes[0], 0) < 8) {
249 return false;
251 *control = tmp;
252 return true;
255 // A single byte lane is copied int all the other lanes: PUNPCK*BW + PSHUF*W +
256 // PSHUFD.
257 static bool TryBroadcast8x16(SimdConstant* control) {
258 const SimdConstant::I8x16& lanes = control->asInt8x16();
259 return ScanConstant(lanes, lanes[0], 0) >= 16;
262 template <int N>
263 static bool TryReverse(SimdConstant* control) {
264 const SimdConstant::I8x16& lanes = control->asInt8x16();
265 for (int i = 0; i < 16; i++) {
266 if (lanes[i] != (i ^ (N - 1))) {
267 return false;
270 return true;
273 // Look for permutations of a single operand.
274 static SimdPermuteOp AnalyzePermute(SimdConstant* control) {
275 // Lane indices are input-agnostic for single-operand permutations.
276 SimdConstant::I8x16 controlBytes;
277 MaskLanes(controlBytes, control->asInt8x16());
279 // Get rid of no-ops immediately, so nobody else needs to check.
280 if (IsIdentity(controlBytes)) {
281 return SimdPermuteOp::MOVE;
284 // Default control is the masked bytes.
285 *control = SimdConstant::CreateX16(controlBytes);
287 // Analysis order matters here and is architecture-dependent or even
288 // microarchitecture-dependent: ideally the cheapest implementation first.
289 // The Intel manual says that the cost of a PSHUFB is about five other
290 // operations, so make that our cutoff.
292 // Word, dword, and qword reversals are handled optimally by general permutes.
294 // Byte reversals are probably best left to PSHUFB, no alternative rendition
295 // seems to reliably go below five instructions. (Discuss.)
297 // Word swaps within doublewords and dword swaps within quadwords are handled
298 // optimally by general permutes.
300 // Dword and qword broadcasts are handled by dword permute.
302 if (TryPermute32x4(control)) {
303 return SimdPermuteOp::PERMUTE_32x4;
305 if (TryRotateRight8x16(control)) {
306 return SimdPermuteOp::ROTATE_RIGHT_8x16;
308 if (TryBroadcast16x8(control)) {
309 return SimdPermuteOp::BROADCAST_16x8;
311 if (TryPermute16x8(control)) {
312 return SimdPermuteOp::PERMUTE_16x8;
314 if (TryBroadcast8x16(control)) {
315 return SimdPermuteOp::BROADCAST_8x16;
317 if (TryReverse<2>(control)) {
318 return SimdPermuteOp::REVERSE_16x8;
320 if (TryReverse<4>(control)) {
321 return SimdPermuteOp::REVERSE_32x4;
323 if (TryReverse<8>(control)) {
324 return SimdPermuteOp::REVERSE_64x2;
327 // TODO: (From v8) Unzip and transpose generally have renditions that slightly
328 // beat a general permute (three or four instructions)
330 // TODO: (From MacroAssemblerX86Shared::ShuffleX4): MOVLHPS and MOVHLPS can be
331 // used when merging two values.
333 // The default operation is to permute bytes with the default control.
334 return SimdPermuteOp::PERMUTE_8x16;
337 // Can we shift the bytes left or right by a constant? A shift is a run of
338 // lanes from the rhs (which is zero) on one end and a run of values from the
339 // lhs on the other end.
340 static Maybe<SimdPermuteOp> TryShift8x16(SimdConstant* control) {
341 const SimdConstant::I8x16& lanes = control->asInt8x16();
343 // Represent all zero lanes by 16
344 SimdConstant::I8x16 zeroesMasked;
345 MapLanes(zeroesMasked, lanes, [](int x) -> int { return x >= 16 ? 16 : x; });
347 int i = ScanConstant(zeroesMasked, 16, 0);
348 int shiftLeft = i;
349 if (shiftLeft > 0 && lanes[shiftLeft] != 0) {
350 return Nothing();
353 i = ScanIncreasingUnmasked(zeroesMasked, i);
354 int shiftRight = 16 - i;
355 if (shiftRight > 0 && lanes[i - 1] != 15) {
356 return Nothing();
359 i = ScanConstant(zeroesMasked, 16, i);
360 if (i < 16 || (shiftRight > 0 && shiftLeft > 0) ||
361 (shiftRight == 0 && shiftLeft == 0)) {
362 return Nothing();
365 if (shiftRight) {
366 *control = SimdConstant::SplatX16((int8_t)shiftRight);
367 return Some(SimdPermuteOp::SHIFT_RIGHT_8x16);
369 *control = SimdConstant::SplatX16((int8_t)shiftLeft);
370 return Some(SimdPermuteOp::SHIFT_LEFT_8x16);
373 // Check if it is unsigned integer extend operation.
374 static Maybe<SimdPermuteOp> TryZeroExtend(SimdConstant* control) {
375 const SimdConstant::I8x16& lanes = control->asInt8x16();
377 // Find fragment of sequantial lanes indices that starts from 0.
378 uint32_t i = 0;
379 for (; i <= 4 && lanes[i] == int8_t(i); i++) {
381 // The length of the fragment has to be a power of 2, and next item is zero.
382 if (!mozilla::IsPowerOfTwo(i) || lanes[i] < 16) {
383 return Nothing();
385 MOZ_ASSERT(i > 0 && i <= 4);
386 uint32_t fromLen = i;
387 // Skip items that will be zero'ed.
388 for (; i <= 8 && lanes[i] >= 16; i++) {
390 // The length of the entire fragment of zero and non-zero items
391 // needs to be power of 2.
392 if (!mozilla::IsPowerOfTwo(i)) {
393 return Nothing();
395 MOZ_ASSERT(i > fromLen && i <= 8);
396 uint32_t toLen = i;
398 // The sequence will repeat every toLen elements: in which first
399 // fromLen items are sequential lane indices, and the rest are zeros.
400 int8_t current = int8_t(fromLen);
401 for (; i < 16; i++) {
402 if ((i % toLen) >= fromLen) {
403 // Expect the item be a zero.
404 if (lanes[i] < 16) {
405 return Nothing();
407 } else {
408 // Check the item is in ascending sequence.
409 if (lanes[i] != current) {
410 return Nothing();
412 current++;
416 switch (fromLen) {
417 case 1:
418 switch (toLen) {
419 case 2:
420 return Some(SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8);
421 case 4:
422 return Some(SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4);
423 case 8:
424 return Some(SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2);
426 break;
427 case 2:
428 switch (toLen) {
429 case 4:
430 return Some(SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4);
431 case 8:
432 return Some(SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2);
434 break;
435 case 4:
436 switch (toLen) {
437 case 8:
438 return Some(SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2);
440 break;
442 MOZ_CRASH("Invalid TryZeroExtend match");
445 static Maybe<SimdPermuteOp> AnalyzeShuffleWithZero(SimdConstant* control) {
446 Maybe<SimdPermuteOp> op;
447 op = TryShift8x16(control);
448 if (op) {
449 return op;
452 op = TryZeroExtend(control);
453 if (op) {
454 return op;
457 // TODO: Optimization opportunity? A byte-blend-with-zero is just a CONST;
458 // PAND. This may beat the general byte blend code below.
459 return Nothing();
462 // Concat: if the result is the suffix (high bytes) of the rhs in front of a
463 // prefix (low bytes) of the lhs then this is PALIGNR; ditto if the operands are
464 // swapped.
465 static Maybe<SimdShuffleOp> TryConcatRightShift8x16(SimdConstant* control,
466 bool* swapOperands) {
467 const SimdConstant::I8x16& lanes = control->asInt8x16();
468 int i = ScanIncreasingUnmasked(lanes, 0);
469 MOZ_ASSERT(i < 16, "Single-operand run should have been handled elswhere");
470 // First run must end with 15 % 16
471 if ((lanes[i - 1] & 15) != 15) {
472 return Nothing();
474 // Second run must start with 0 % 16
475 if ((lanes[i] & 15) != 0) {
476 return Nothing();
478 // The two runs must come from different inputs
479 if ((lanes[i] & 16) == (lanes[i - 1] & 16)) {
480 return Nothing();
482 int suffixLength = i;
484 i = ScanIncreasingUnmasked(lanes, i);
485 // Must end at the left end
486 if (i != 16) {
487 return Nothing();
490 // If the suffix is from the lhs then swap the operands
491 if (lanes[0] < 16) {
492 *swapOperands = !*swapOperands;
494 *control = SimdConstant::SplatX16((int8_t)suffixLength);
495 return Some(SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16);
498 // Blend words: if we pick words from both operands without a pattern but all
499 // the input words stay in their position then this is PBLENDW (immediate mask);
500 // this also handles all larger sizes on x64.
501 static Maybe<SimdShuffleOp> TryBlendInt16x8(SimdConstant* control) {
502 SimdConstant tmp(*control);
503 if (!ByteMaskToWordMask(&tmp)) {
504 return Nothing();
506 SimdConstant::I16x8 masked;
507 MaskLanes(masked, tmp.asInt16x8());
508 if (!IsIdentity(masked)) {
509 return Nothing();
511 SimdConstant::I16x8 mapped;
512 MapLanes(mapped, tmp.asInt16x8(),
513 [](int x) -> int { return x < 8 ? 0 : -1; });
514 *control = SimdConstant::CreateX8(mapped);
515 return Some(SimdShuffleOp::BLEND_16x8);
518 // Blend bytes: if we pick bytes ditto then this is a byte blend, which can be
519 // handled with a CONST, PAND, PANDNOT, and POR.
521 // TODO: Optimization opportunity? If we pick all but one lanes from one with at
522 // most one from the other then it could be a MOV + PEXRB + PINSRB (also if this
523 // element is not in its source location).
524 static Maybe<SimdShuffleOp> TryBlendInt8x16(SimdConstant* control) {
525 SimdConstant::I8x16 masked;
526 MaskLanes(masked, control->asInt8x16());
527 if (!IsIdentity(masked)) {
528 return Nothing();
530 SimdConstant::I8x16 mapped;
531 MapLanes(mapped, control->asInt8x16(),
532 [](int x) -> int { return x < 16 ? 0 : -1; });
533 *control = SimdConstant::CreateX16(mapped);
534 return Some(SimdShuffleOp::BLEND_8x16);
537 template <typename T>
538 static bool MatchInterleave(const T* lanes, int lhs, int rhs, int len) {
539 for (int i = 0; i < len; i++) {
540 if (lanes[i * 2] != lhs + i || lanes[i * 2 + 1] != rhs + i) {
541 return false;
544 return true;
547 // Unpack/interleave:
548 // - if we interleave the low (bytes/words/doublewords) of the inputs into
549 // the output then this is UNPCKL*W (possibly with a swap of operands).
550 // - if we interleave the high ditto then it is UNPCKH*W (ditto)
551 template <typename T>
552 static Maybe<SimdShuffleOp> TryInterleave(const T* lanes, int lhs, int rhs,
553 bool* swapOperands,
554 SimdShuffleOp lowOp,
555 SimdShuffleOp highOp) {
556 int len = int(32 / (sizeof(T) * 4));
557 if (MatchInterleave(lanes, lhs, rhs, len)) {
558 return Some(lowOp);
560 if (MatchInterleave(lanes, rhs, lhs, len)) {
561 *swapOperands = !*swapOperands;
562 return Some(lowOp);
564 if (MatchInterleave(lanes, lhs + len, rhs + len, len)) {
565 return Some(highOp);
567 if (MatchInterleave(lanes, rhs + len, lhs + len, len)) {
568 *swapOperands = !*swapOperands;
569 return Some(highOp);
571 return Nothing();
574 static Maybe<SimdShuffleOp> TryInterleave64x2(SimdConstant* control,
575 bool* swapOperands) {
576 SimdConstant tmp = *control;
577 if (!ByteMaskToQWordMask(&tmp)) {
578 return Nothing();
580 const SimdConstant::I64x2& lanes = tmp.asInt64x2();
581 return TryInterleave(lanes, 0, 2, swapOperands,
582 SimdShuffleOp::INTERLEAVE_LOW_64x2,
583 SimdShuffleOp::INTERLEAVE_HIGH_64x2);
586 static Maybe<SimdShuffleOp> TryInterleave32x4(SimdConstant* control,
587 bool* swapOperands) {
588 SimdConstant tmp = *control;
589 if (!ByteMaskToDWordMask(&tmp)) {
590 return Nothing();
592 const SimdConstant::I32x4& lanes = tmp.asInt32x4();
593 return TryInterleave(lanes, 0, 4, swapOperands,
594 SimdShuffleOp::INTERLEAVE_LOW_32x4,
595 SimdShuffleOp::INTERLEAVE_HIGH_32x4);
598 static Maybe<SimdShuffleOp> TryInterleave16x8(SimdConstant* control,
599 bool* swapOperands) {
600 SimdConstant tmp = *control;
601 if (!ByteMaskToWordMask(&tmp)) {
602 return Nothing();
604 const SimdConstant::I16x8& lanes = tmp.asInt16x8();
605 return TryInterleave(lanes, 0, 8, swapOperands,
606 SimdShuffleOp::INTERLEAVE_LOW_16x8,
607 SimdShuffleOp::INTERLEAVE_HIGH_16x8);
610 static Maybe<SimdShuffleOp> TryInterleave8x16(SimdConstant* control,
611 bool* swapOperands) {
612 const SimdConstant::I8x16& lanes = control->asInt8x16();
613 return TryInterleave(lanes, 0, 16, swapOperands,
614 SimdShuffleOp::INTERLEAVE_LOW_8x16,
615 SimdShuffleOp::INTERLEAVE_HIGH_8x16);
618 static SimdShuffleOp AnalyzeTwoArgShuffle(SimdConstant* control,
619 bool* swapOperands) {
620 Maybe<SimdShuffleOp> op;
621 op = TryConcatRightShift8x16(control, swapOperands);
622 if (!op) {
623 op = TryBlendInt16x8(control);
625 if (!op) {
626 op = TryBlendInt8x16(control);
628 if (!op) {
629 op = TryInterleave64x2(control, swapOperands);
631 if (!op) {
632 op = TryInterleave32x4(control, swapOperands);
634 if (!op) {
635 op = TryInterleave16x8(control, swapOperands);
637 if (!op) {
638 op = TryInterleave8x16(control, swapOperands);
640 if (!op) {
641 op = Some(SimdShuffleOp::SHUFFLE_BLEND_8x16);
643 return *op;
646 // Reorder the operands if that seems useful, notably, move a constant to the
647 // right hand side. Rewrites the control to account for any move.
648 static bool MaybeReorderShuffleOperands(MDefinition** lhs, MDefinition** rhs,
649 SimdConstant* control) {
650 if ((*lhs)->isWasmFloatConstant()) {
651 MDefinition* tmp = *lhs;
652 *lhs = *rhs;
653 *rhs = tmp;
655 int8_t controlBytes[16];
656 const SimdConstant::I8x16& lanes = control->asInt8x16();
657 for (unsigned i = 0; i < 16; i++) {
658 controlBytes[i] = int8_t(lanes[i] ^ 16);
660 *control = SimdConstant::CreateX16(controlBytes);
662 return true;
664 return false;
667 # ifdef DEBUG
668 static const SimdShuffle& ReportShuffleSpecialization(const SimdShuffle& s) {
669 switch (s.opd) {
670 case SimdShuffle::Operand::BOTH:
671 case SimdShuffle::Operand::BOTH_SWAPPED:
672 switch (*s.shuffleOp) {
673 case SimdShuffleOp::SHUFFLE_BLEND_8x16:
674 js::wasm::ReportSimdAnalysis("shuffle -> shuffle+blend 8x16");
675 break;
676 case SimdShuffleOp::BLEND_8x16:
677 js::wasm::ReportSimdAnalysis("shuffle -> blend 8x16");
678 break;
679 case SimdShuffleOp::BLEND_16x8:
680 js::wasm::ReportSimdAnalysis("shuffle -> blend 16x8");
681 break;
682 case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16:
683 js::wasm::ReportSimdAnalysis("shuffle -> concat+shift-right 8x16");
684 break;
685 case SimdShuffleOp::INTERLEAVE_HIGH_8x16:
686 js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 8x16");
687 break;
688 case SimdShuffleOp::INTERLEAVE_HIGH_16x8:
689 js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 16x8");
690 break;
691 case SimdShuffleOp::INTERLEAVE_HIGH_32x4:
692 js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 32x4");
693 break;
694 case SimdShuffleOp::INTERLEAVE_HIGH_64x2:
695 js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 64x2");
696 break;
697 case SimdShuffleOp::INTERLEAVE_LOW_8x16:
698 js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 8x16");
699 break;
700 case SimdShuffleOp::INTERLEAVE_LOW_16x8:
701 js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 16x8");
702 break;
703 case SimdShuffleOp::INTERLEAVE_LOW_32x4:
704 js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 32x4");
705 break;
706 case SimdShuffleOp::INTERLEAVE_LOW_64x2:
707 js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 64x2");
708 break;
709 default:
710 MOZ_CRASH("Unexpected shuffle op");
712 break;
713 case SimdShuffle::Operand::LEFT:
714 case SimdShuffle::Operand::RIGHT:
715 switch (*s.permuteOp) {
716 case SimdPermuteOp::BROADCAST_8x16:
717 js::wasm::ReportSimdAnalysis("shuffle -> broadcast 8x16");
718 break;
719 case SimdPermuteOp::BROADCAST_16x8:
720 js::wasm::ReportSimdAnalysis("shuffle -> broadcast 16x8");
721 break;
722 case SimdPermuteOp::MOVE:
723 js::wasm::ReportSimdAnalysis("shuffle -> move");
724 break;
725 case SimdPermuteOp::REVERSE_16x8:
726 js::wasm::ReportSimdAnalysis(
727 "shuffle -> reverse bytes in 16-bit lanes");
728 break;
729 case SimdPermuteOp::REVERSE_32x4:
730 js::wasm::ReportSimdAnalysis(
731 "shuffle -> reverse bytes in 32-bit lanes");
732 break;
733 case SimdPermuteOp::REVERSE_64x2:
734 js::wasm::ReportSimdAnalysis(
735 "shuffle -> reverse bytes in 64-bit lanes");
736 break;
737 case SimdPermuteOp::PERMUTE_8x16:
738 js::wasm::ReportSimdAnalysis("shuffle -> permute 8x16");
739 break;
740 case SimdPermuteOp::PERMUTE_16x8:
741 js::wasm::ReportSimdAnalysis("shuffle -> permute 16x8");
742 break;
743 case SimdPermuteOp::PERMUTE_32x4:
744 js::wasm::ReportSimdAnalysis("shuffle -> permute 32x4");
745 break;
746 case SimdPermuteOp::ROTATE_RIGHT_8x16:
747 js::wasm::ReportSimdAnalysis("shuffle -> rotate-right 8x16");
748 break;
749 case SimdPermuteOp::SHIFT_LEFT_8x16:
750 js::wasm::ReportSimdAnalysis("shuffle -> shift-left 8x16");
751 break;
752 case SimdPermuteOp::SHIFT_RIGHT_8x16:
753 js::wasm::ReportSimdAnalysis("shuffle -> shift-right 8x16");
754 break;
755 case SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8:
756 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 8x16 to 16x8");
757 break;
758 case SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4:
759 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 8x16 to 32x4");
760 break;
761 case SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2:
762 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 8x16 to 64x2");
763 break;
764 case SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4:
765 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 16x8 to 32x4");
766 break;
767 case SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2:
768 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 16x8 to 64x2");
769 break;
770 case SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2:
771 js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 32x4 to 64x2");
772 break;
773 default:
774 MOZ_CRASH("Unexpected permute op");
776 break;
778 return s;
780 # endif // DEBUG
782 SimdShuffle jit::AnalyzeSimdShuffle(SimdConstant control, MDefinition* lhs,
783 MDefinition* rhs) {
784 # ifdef DEBUG
785 # define R(s) ReportShuffleSpecialization(s)
786 # else
787 # define R(s) (s)
788 # endif
790 // If only one of the inputs is used, determine which.
791 bool useLeft = true;
792 bool useRight = true;
793 if (lhs == rhs) {
794 useRight = false;
795 } else {
796 bool allAbove = true;
797 bool allBelow = true;
798 const SimdConstant::I8x16& lanes = control.asInt8x16();
799 for (int8_t i : lanes) {
800 allAbove = allAbove && i >= 16;
801 allBelow = allBelow && i < 16;
803 if (allAbove) {
804 useLeft = false;
805 } else if (allBelow) {
806 useRight = false;
810 // Deal with one-ignored-input.
811 if (!(useLeft && useRight)) {
812 SimdPermuteOp op = AnalyzePermute(&control);
813 return R(SimdShuffle::permute(
814 useLeft ? SimdShuffle::Operand::LEFT : SimdShuffle::Operand::RIGHT,
815 control, op));
818 // Move constants to rhs.
819 bool swapOperands = MaybeReorderShuffleOperands(&lhs, &rhs, &control);
821 // Deal with constant rhs.
822 if (rhs->isWasmFloatConstant()) {
823 SimdConstant rhsConstant = rhs->toWasmFloatConstant()->toSimd128();
824 if (rhsConstant.isZeroBits()) {
825 Maybe<SimdPermuteOp> op = AnalyzeShuffleWithZero(&control);
826 if (op) {
827 return R(SimdShuffle::permute(swapOperands ? SimdShuffle::Operand::RIGHT
828 : SimdShuffle::Operand::LEFT,
829 control, *op));
834 // Two operands both of which are used. If there's one constant operand it is
835 // now on the rhs.
836 SimdShuffleOp op = AnalyzeTwoArgShuffle(&control, &swapOperands);
837 return R(SimdShuffle::shuffle(swapOperands
838 ? SimdShuffle::Operand::BOTH_SWAPPED
839 : SimdShuffle::Operand::BOTH,
840 control, op));
841 # undef R
844 #endif // ENABLE_WASM_SIMD