js/src/jit/ShuffleAnalysis.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this
   4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 #include "jit/ShuffleAnalysis.h"
   7 #include "mozilla/MathAlgorithms.h"
   8 #include "jit/MIR.h"
   9 #include "wasm/WasmFeatures.h"
  10
  11 using namespace js;
  12 using namespace jit;
  13
  14 using mozilla::Maybe;
  15 using mozilla::Nothing;
  16 using mozilla::Some;
  17
  18 #ifdef ENABLE_WASM_SIMD
  19
  20 // Specialization analysis for SIMD operations.  This is still x86-centric but
  21 // generalizes fairly easily to other architectures.
  22
  23 // Optimization of v8x16.shuffle.  The general byte shuffle+blend is very
  24 // expensive (equivalent to at least a dozen instructions), and we want to avoid
  25 // that if we can.  So look for special cases - there are many.
  26 //
  27 // The strategy is to sort the operation into one of three buckets depending
  28 // on the shuffle pattern and inputs:
  29 //
  30 //  - single operand; shuffles on these values are rotations, reversals,
  31 //    transpositions, and general permutations
  32 //  - single-operand-with-interesting-constant (especially zero); shuffles on
  33 //    these values are often byte shift or scatter operations
  34 //  - dual operand; shuffles on these operations are blends, catenated
  35 //    shifts, and (in the worst case) general shuffle+blends
  36 //
  37 // We're not trying to solve the general problem, only to lower reasonably
  38 // expressed patterns that express common operations.  Producers that produce
  39 // dense and convoluted patterns will end up with the general byte shuffle.
  40 // Producers that produce simpler patterns that easily map to hardware will
  41 // get faster code.
  42 //
  43 // In particular, these matchers do not try to combine transformations, so a
  44 // shuffle that optimally is lowered to rotate + permute32x4 + rotate, say, is
  45 // usually going to end up as a general byte shuffle.
  46
  47 // Reduce a 0..31 byte mask to a 0..15 word mask if possible and if so return
  48 // true, updating *control.
  49 static bool ByteMaskToWordMask(SimdConstant* control) {
  50   const SimdConstant::I8x16& lanes = control->asInt8x16();
  51   int16_t controlWords[8];
  52   for (int i = 0; i < 16; i += 2) {
  53     if (!((lanes[i] & 1) == 0 && lanes[i + 1] == lanes[i] + 1)) {
  54       return false;
  55     }
  56     controlWords[i / 2] = int16_t(lanes[i] / 2);
  57   }
  58   *control = SimdConstant::CreateX8(controlWords);
  59   return true;
  60 }
  61
  62 // Reduce a 0..31 byte mask to a 0..7 dword mask if possible and if so return
  63 // true, updating *control.
  64 static bool ByteMaskToDWordMask(SimdConstant* control) {
  65   const SimdConstant::I8x16& lanes = control->asInt8x16();
  66   int32_t controlDWords[4];
  67   for (int i = 0; i < 16; i += 4) {
  68     if (!((lanes[i] & 3) == 0 && lanes[i + 1] == lanes[i] + 1 &&
  69           lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3)) {
  70       return false;
  71     }
  72     controlDWords[i / 4] = lanes[i] / 4;
  73   }
  74   *control = SimdConstant::CreateX4(controlDWords);
  75   return true;
  76 }
  77
  78 // Reduce a 0..31 byte mask to a 0..3 qword mask if possible and if so return
  79 // true, updating *control.
  80 static bool ByteMaskToQWordMask(SimdConstant* control) {
  81   const SimdConstant::I8x16& lanes = control->asInt8x16();
  82   int64_t controlQWords[2];
  83   for (int i = 0; i < 16; i += 8) {
  84     if (!((lanes[i] & 7) == 0 && lanes[i + 1] == lanes[i] + 1 &&
  85           lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3 &&
  86           lanes[i + 4] == lanes[i] + 4 && lanes[i + 5] == lanes[i] + 5 &&
  87           lanes[i + 6] == lanes[i] + 6 && lanes[i + 7] == lanes[i] + 7)) {
  88       return false;
  89     }
  90     controlQWords[i / 8] = lanes[i] / 8;
  91   }
  92   *control = SimdConstant::CreateX2(controlQWords);
  93   return true;
  94 }
  95
  96 // Skip across consecutive values in lanes starting at i, returning the index
  97 // after the last element.  Lane values must be <= len-1 ("masked").
  98 //
  99 // Since every element is a 1-element run, the return value is never the same as
 100 // the starting i.
 101 template <typename T>
 102 static int ScanIncreasingMasked(const T* lanes, int i) {
 103   int len = int(16 / sizeof(T));
 104   MOZ_ASSERT(i < len);
 105   MOZ_ASSERT(lanes[i] <= len - 1);
 106   i++;
 107   while (i < len && lanes[i] == lanes[i - 1] + 1) {
 108     MOZ_ASSERT(lanes[i] <= len - 1);
 109     i++;
 110   }
 111   return i;
 112 }
 113
 114 // Skip across consecutive values in lanes starting at i, returning the index
 115 // after the last element.  Lane values must be <= len*2-1 ("unmasked"); the
 116 // values len-1 and len are not considered consecutive.
 117 //
 118 // Since every element is a 1-element run, the return value is never the same as
 119 // the starting i.
 120 template <typename T>
 121 static int ScanIncreasingUnmasked(const T* lanes, int i) {
 122   int len = int(16 / sizeof(T));
 123   MOZ_ASSERT(i < len);
 124   if (lanes[i] < len) {
 125     i++;
 126     while (i < len && lanes[i] < len && lanes[i - 1] == lanes[i] - 1) {
 127       i++;
 128     }
 129   } else {
 130     i++;
 131     while (i < len && lanes[i] >= len && lanes[i - 1] == lanes[i] - 1) {
 132       i++;
 133     }
 134   }
 135   return i;
 136 }
 137
 138 // Skip lanes that equal v starting at i, returning the index just beyond the
 139 // last of those.  There is no requirement that the initial lanes[i] == v.
 140 template <typename T>
 141 static int ScanConstant(const T* lanes, int v, int i) {
 142   int len = int(16 / sizeof(T));
 143   MOZ_ASSERT(i <= len);
 144   while (i < len && lanes[i] == v) {
 145     i++;
 146   }
 147   return i;
 148 }
 149
 150 // Mask lane values denoting rhs elements into lhs elements.
 151 template <typename T>
 152 static void MaskLanes(T* result, const T* input) {
 153   int len = int(16 / sizeof(T));
 154   for (int i = 0; i < len; i++) {
 155     result[i] = input[i] & (len - 1);
 156   }
 157 }
 158
 159 // Apply a transformation to each lane value.
 160 template <typename T>
 161 static void MapLanes(T* result, const T* input, int (*f)(int)) {
 162   // Hazard analysis trips on "IndirectCall: f" error.
 163   // Suppress the check -- `f` is expected to be trivial here.
 164   JS::AutoSuppressGCAnalysis nogc;
 165
 166   int len = int(16 / sizeof(T));
 167   for (int i = 0; i < len; i++) {
 168     result[i] = f(input[i]);
 169   }
 170 }
 171
 172 // Recognize an identity permutation, assuming lanes is masked.
 173 template <typename T>
 174 static bool IsIdentity(const T* lanes) {
 175   return ScanIncreasingMasked(lanes, 0) == int(16 / sizeof(T));
 176 }
 177
 178 // Recognize part of an identity permutation starting at start, with
 179 // the first value of the permutation expected to be bias.
 180 template <typename T>
 181 static bool IsIdentity(const T* lanes, int start, int len, int bias) {
 182   if (lanes[start] != bias) {
 183     return false;
 184   }
 185   for (int i = start + 1; i < start + len; i++) {
 186     if (lanes[i] != lanes[i - 1] + 1) {
 187       return false;
 188     }
 189   }
 190   return true;
 191 }
 192
 193 // We can permute by dwords if the mask is reducible to a dword mask, and in
 194 // this case a single PSHUFD is enough.
 195 static bool TryPermute32x4(SimdConstant* control) {
 196   SimdConstant tmp = *control;
 197   if (!ByteMaskToDWordMask(&tmp)) {
 198     return false;
 199   }
 200   *control = tmp;
 201   return true;
 202 }
 203
 204 // Can we perform a byte rotate right?  We can use PALIGNR.  The shift count is
 205 // just lanes[0], and *control is unchanged.
 206 static bool TryRotateRight8x16(SimdConstant* control) {
 207   const SimdConstant::I8x16& lanes = control->asInt8x16();
 208   // Look for the end of the first run of consecutive bytes.
 209   int i = ScanIncreasingMasked(lanes, 0);
 210
 211   // First run must start at a value s.t. we have a rotate if all remaining
 212   // bytes are a run.
 213   if (lanes[0] != 16 - i) {
 214     return false;
 215   }
 216
 217   // If we reached the end of the vector, we're done.
 218   if (i == 16) {
 219     return true;
 220   }
 221
 222   // Second run must start at source lane zero.
 223   if (lanes[i] != 0) {
 224     return false;
 225   }
 226
 227   // Second run must end at the end of the lane vector.
 228   return ScanIncreasingMasked(lanes, i) == 16;
 229 }
 230
 231 // We can permute by words if the mask is reducible to a word mask.
 232 static bool TryPermute16x8(SimdConstant* control) {
 233   SimdConstant tmp = *control;
 234   if (!ByteMaskToWordMask(&tmp)) {
 235     return false;
 236   }
 237   *control = tmp;
 238   return true;
 239 }
 240
 241 // A single word lane is copied into all the other lanes: PSHUF*W + PSHUFD.
 242 static bool TryBroadcast16x8(SimdConstant* control) {
 243   SimdConstant tmp = *control;
 244   if (!ByteMaskToWordMask(&tmp)) {
 245     return false;
 246   }
 247   const SimdConstant::I16x8& lanes = tmp.asInt16x8();
 248   if (ScanConstant(lanes, lanes[0], 0) < 8) {
 249     return false;
 250   }
 251   *control = tmp;
 252   return true;
 253 }
 254
 255 // A single byte lane is copied int all the other lanes: PUNPCK*BW + PSHUF*W +
 256 // PSHUFD.
 257 static bool TryBroadcast8x16(SimdConstant* control) {
 258   const SimdConstant::I8x16& lanes = control->asInt8x16();
 259   return ScanConstant(lanes, lanes[0], 0) >= 16;
 260 }
 261
 262 template <int N>
 263 static bool TryReverse(SimdConstant* control) {
 264   const SimdConstant::I8x16& lanes = control->asInt8x16();
 265   for (int i = 0; i < 16; i++) {
 266     if (lanes[i] != (i ^ (N - 1))) {
 267       return false;
 268     }
 269   }
 270   return true;
 271 }
 272
 273 // Look for permutations of a single operand.
 274 static SimdPermuteOp AnalyzePermute(SimdConstant* control) {
 275   // Lane indices are input-agnostic for single-operand permutations.
 276   SimdConstant::I8x16 controlBytes;
 277   MaskLanes(controlBytes, control->asInt8x16());
 278
 279   // Get rid of no-ops immediately, so nobody else needs to check.
 280   if (IsIdentity(controlBytes)) {
 281     return SimdPermuteOp::MOVE;
 282   }
 283
 284   // Default control is the masked bytes.
 285   *control = SimdConstant::CreateX16(controlBytes);
 286
 287   // Analysis order matters here and is architecture-dependent or even
 288   // microarchitecture-dependent: ideally the cheapest implementation first.
 289   // The Intel manual says that the cost of a PSHUFB is about five other
 290   // operations, so make that our cutoff.
 291   //
 292   // Word, dword, and qword reversals are handled optimally by general permutes.
 293   //
 294   // Byte reversals are probably best left to PSHUFB, no alternative rendition
 295   // seems to reliably go below five instructions.  (Discuss.)
 296   //
 297   // Word swaps within doublewords and dword swaps within quadwords are handled
 298   // optimally by general permutes.
 299   //
 300   // Dword and qword broadcasts are handled by dword permute.
 301
 302   if (TryPermute32x4(control)) {
 303     return SimdPermuteOp::PERMUTE_32x4;
 304   }
 305   if (TryRotateRight8x16(control)) {
 306     return SimdPermuteOp::ROTATE_RIGHT_8x16;
 307   }
 308   if (TryBroadcast16x8(control)) {
 309     return SimdPermuteOp::BROADCAST_16x8;
 310   }
 311   if (TryPermute16x8(control)) {
 312     return SimdPermuteOp::PERMUTE_16x8;
 313   }
 314   if (TryBroadcast8x16(control)) {
 315     return SimdPermuteOp::BROADCAST_8x16;
 316   }
 317   if (TryReverse<2>(control)) {
 318     return SimdPermuteOp::REVERSE_16x8;
 319   }
 320   if (TryReverse<4>(control)) {
 321     return SimdPermuteOp::REVERSE_32x4;
 322   }
 323   if (TryReverse<8>(control)) {
 324     return SimdPermuteOp::REVERSE_64x2;
 325   }
 326
 327   // TODO: (From v8) Unzip and transpose generally have renditions that slightly
 328   // beat a general permute (three or four instructions)
 329   //
 330   // TODO: (From MacroAssemblerX86Shared::ShuffleX4): MOVLHPS and MOVHLPS can be
 331   // used when merging two values.
 332
 333   // The default operation is to permute bytes with the default control.
 334   return SimdPermuteOp::PERMUTE_8x16;
 335 }
 336
 337 // Can we shift the bytes left or right by a constant?  A shift is a run of
 338 // lanes from the rhs (which is zero) on one end and a run of values from the
 339 // lhs on the other end.
 340 static Maybe<SimdPermuteOp> TryShift8x16(SimdConstant* control) {
 341   const SimdConstant::I8x16& lanes = control->asInt8x16();
 342
 343   // Represent all zero lanes by 16
 344   SimdConstant::I8x16 zeroesMasked;
 345   MapLanes(zeroesMasked, lanes, [](int x) -> int { return x >= 16 ? 16 : x; });
 346
 347   int i = ScanConstant(zeroesMasked, 16, 0);
 348   int shiftLeft = i;
 349   if (shiftLeft > 0 && lanes[shiftLeft] != 0) {
 350     return Nothing();
 351   }
 352
 353   i = ScanIncreasingUnmasked(zeroesMasked, i);
 354   int shiftRight = 16 - i;
 355   if (shiftRight > 0 && lanes[i - 1] != 15) {
 356     return Nothing();
 357   }
 358
 359   i = ScanConstant(zeroesMasked, 16, i);
 360   if (i < 16 || (shiftRight > 0 && shiftLeft > 0) ||
 361       (shiftRight == 0 && shiftLeft == 0)) {
 362     return Nothing();
 363   }
 364
 365   if (shiftRight) {
 366     *control = SimdConstant::SplatX16((int8_t)shiftRight);
 367     return Some(SimdPermuteOp::SHIFT_RIGHT_8x16);
 368   }
 369   *control = SimdConstant::SplatX16((int8_t)shiftLeft);
 370   return Some(SimdPermuteOp::SHIFT_LEFT_8x16);
 371 }
 372
 373 // Check if it is unsigned integer extend operation.
 374 static Maybe<SimdPermuteOp> TryZeroExtend(SimdConstant* control) {
 375   const SimdConstant::I8x16& lanes = control->asInt8x16();
 376
 377   // Find fragment of sequantial lanes indices that starts from 0.
 378   uint32_t i = 0;
 379   for (; i <= 4 && lanes[i] == int8_t(i); i++) {
 380   }
 381   // The length of the fragment has to be a power of 2, and next item is zero.
 382   if (!mozilla::IsPowerOfTwo(i) || lanes[i] < 16) {
 383     return Nothing();
 384   }
 385   MOZ_ASSERT(i > 0 && i <= 4);
 386   uint32_t fromLen = i;
 387   // Skip items that will be zero'ed.
 388   for (; i <= 8 && lanes[i] >= 16; i++) {
 389   }
 390   // The length of the entire fragment of zero and non-zero items
 391   // needs to be power of 2.
 392   if (!mozilla::IsPowerOfTwo(i)) {
 393     return Nothing();
 394   }
 395   MOZ_ASSERT(i > fromLen && i <= 8);
 396   uint32_t toLen = i;
 397
 398   // The sequence will repeat every toLen elements: in which first
 399   // fromLen items are sequential lane indices, and the rest are zeros.
 400   int8_t current = int8_t(fromLen);
 401   for (; i < 16; i++) {
 402     if ((i % toLen) >= fromLen) {
 403       // Expect the item be a zero.
 404       if (lanes[i] < 16) {
 405         return Nothing();
 406       }
 407     } else {
 408       // Check the item is in ascending sequence.
 409       if (lanes[i] != current) {
 410         return Nothing();
 411       }
 412       current++;
 413     }
 414   }
 415
 416   switch (fromLen) {
 417     case 1:
 418       switch (toLen) {
 419         case 2:
 420           return Some(SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8);
 421         case 4:
 422           return Some(SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4);
 423         case 8:
 424           return Some(SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2);
 425       }
 426       break;
 427     case 2:
 428       switch (toLen) {
 429         case 4:
 430           return Some(SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4);
 431         case 8:
 432           return Some(SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2);
 433       }
 434       break;
 435     case 4:
 436       switch (toLen) {
 437         case 8:
 438           return Some(SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2);
 439       }
 440       break;
 441   }
 442   MOZ_CRASH("Invalid TryZeroExtend match");
 443 }
 444
 445 static Maybe<SimdPermuteOp> AnalyzeShuffleWithZero(SimdConstant* control) {
 446   Maybe<SimdPermuteOp> op;
 447   op = TryShift8x16(control);
 448   if (op) {
 449     return op;
 450   }
 451
 452   op = TryZeroExtend(control);
 453   if (op) {
 454     return op;
 455   }
 456
 457   // TODO: Optimization opportunity? A byte-blend-with-zero is just a CONST;
 458   // PAND.  This may beat the general byte blend code below.
 459   return Nothing();
 460 }
 461
 462 // Concat: if the result is the suffix (high bytes) of the rhs in front of a
 463 // prefix (low bytes) of the lhs then this is PALIGNR; ditto if the operands are
 464 // swapped.
 465 static Maybe<SimdShuffleOp> TryConcatRightShift8x16(SimdConstant* control,
 466                                                     bool* swapOperands) {
 467   const SimdConstant::I8x16& lanes = control->asInt8x16();
 468   int i = ScanIncreasingUnmasked(lanes, 0);
 469   MOZ_ASSERT(i < 16, "Single-operand run should have been handled elswhere");
 470   // First run must end with 15 % 16
 471   if ((lanes[i - 1] & 15) != 15) {
 472     return Nothing();
 473   }
 474   // Second run must start with 0 % 16
 475   if ((lanes[i] & 15) != 0) {
 476     return Nothing();
 477   }
 478   // The two runs must come from different inputs
 479   if ((lanes[i] & 16) == (lanes[i - 1] & 16)) {
 480     return Nothing();
 481   }
 482   int suffixLength = i;
 483
 484   i = ScanIncreasingUnmasked(lanes, i);
 485   // Must end at the left end
 486   if (i != 16) {
 487     return Nothing();
 488   }
 489
 490   // If the suffix is from the lhs then swap the operands
 491   if (lanes[0] < 16) {
 492     *swapOperands = !*swapOperands;
 493   }
 494   *control = SimdConstant::SplatX16((int8_t)suffixLength);
 495   return Some(SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16);
 496 }
 497
 498 // Blend words: if we pick words from both operands without a pattern but all
 499 // the input words stay in their position then this is PBLENDW (immediate mask);
 500 // this also handles all larger sizes on x64.
 501 static Maybe<SimdShuffleOp> TryBlendInt16x8(SimdConstant* control) {
 502   SimdConstant tmp(*control);
 503   if (!ByteMaskToWordMask(&tmp)) {
 504     return Nothing();
 505   }
 506   SimdConstant::I16x8 masked;
 507   MaskLanes(masked, tmp.asInt16x8());
 508   if (!IsIdentity(masked)) {
 509     return Nothing();
 510   }
 511   SimdConstant::I16x8 mapped;
 512   MapLanes(mapped, tmp.asInt16x8(),
 513            [](int x) -> int { return x < 8 ? 0 : -1; });
 514   *control = SimdConstant::CreateX8(mapped);
 515   return Some(SimdShuffleOp::BLEND_16x8);
 516 }
 517
 518 // Blend bytes: if we pick bytes ditto then this is a byte blend, which can be
 519 // handled with a CONST, PAND, PANDNOT, and POR.
 520 //
 521 // TODO: Optimization opportunity? If we pick all but one lanes from one with at
 522 // most one from the other then it could be a MOV + PEXRB + PINSRB (also if this
 523 // element is not in its source location).
 524 static Maybe<SimdShuffleOp> TryBlendInt8x16(SimdConstant* control) {
 525   SimdConstant::I8x16 masked;
 526   MaskLanes(masked, control->asInt8x16());
 527   if (!IsIdentity(masked)) {
 528     return Nothing();
 529   }
 530   SimdConstant::I8x16 mapped;
 531   MapLanes(mapped, control->asInt8x16(),
 532            [](int x) -> int { return x < 16 ? 0 : -1; });
 533   *control = SimdConstant::CreateX16(mapped);
 534   return Some(SimdShuffleOp::BLEND_8x16);
 535 }
 536
 537 template <typename T>
 538 static bool MatchInterleave(const T* lanes, int lhs, int rhs, int len) {
 539   for (int i = 0; i < len; i++) {
 540     if (lanes[i * 2] != lhs + i || lanes[i * 2 + 1] != rhs + i) {
 541       return false;
 542     }
 543   }
 544   return true;
 545 }
 546
 547 // Unpack/interleave:
 548 //  - if we interleave the low (bytes/words/doublewords) of the inputs into
 549 //    the output then this is UNPCKL*W (possibly with a swap of operands).
 550 //  - if we interleave the high ditto then it is UNPCKH*W (ditto)
 551 template <typename T>
 552 static Maybe<SimdShuffleOp> TryInterleave(const T* lanes, int lhs, int rhs,
 553                                           bool* swapOperands,
 554                                           SimdShuffleOp lowOp,
 555                                           SimdShuffleOp highOp) {
 556   int len = int(32 / (sizeof(T) * 4));
 557   if (MatchInterleave(lanes, lhs, rhs, len)) {
 558     return Some(lowOp);
 559   }
 560   if (MatchInterleave(lanes, rhs, lhs, len)) {
 561     *swapOperands = !*swapOperands;
 562     return Some(lowOp);
 563   }
 564   if (MatchInterleave(lanes, lhs + len, rhs + len, len)) {
 565     return Some(highOp);
 566   }
 567   if (MatchInterleave(lanes, rhs + len, lhs + len, len)) {
 568     *swapOperands = !*swapOperands;
 569     return Some(highOp);
 570   }
 571   return Nothing();
 572 }
 573
 574 static Maybe<SimdShuffleOp> TryInterleave64x2(SimdConstant* control,
 575                                               bool* swapOperands) {
 576   SimdConstant tmp = *control;
 577   if (!ByteMaskToQWordMask(&tmp)) {
 578     return Nothing();
 579   }
 580   const SimdConstant::I64x2& lanes = tmp.asInt64x2();
 581   return TryInterleave(lanes, 0, 2, swapOperands,
 582                        SimdShuffleOp::INTERLEAVE_LOW_64x2,
 583                        SimdShuffleOp::INTERLEAVE_HIGH_64x2);
 584 }
 585
 586 static Maybe<SimdShuffleOp> TryInterleave32x4(SimdConstant* control,
 587                                               bool* swapOperands) {
 588   SimdConstant tmp = *control;
 589   if (!ByteMaskToDWordMask(&tmp)) {
 590     return Nothing();
 591   }
 592   const SimdConstant::I32x4& lanes = tmp.asInt32x4();
 593   return TryInterleave(lanes, 0, 4, swapOperands,
 594                        SimdShuffleOp::INTERLEAVE_LOW_32x4,
 595                        SimdShuffleOp::INTERLEAVE_HIGH_32x4);
 596 }
 597
 598 static Maybe<SimdShuffleOp> TryInterleave16x8(SimdConstant* control,
 599                                               bool* swapOperands) {
 600   SimdConstant tmp = *control;
 601   if (!ByteMaskToWordMask(&tmp)) {
 602     return Nothing();
 603   }
 604   const SimdConstant::I16x8& lanes = tmp.asInt16x8();
 605   return TryInterleave(lanes, 0, 8, swapOperands,
 606                        SimdShuffleOp::INTERLEAVE_LOW_16x8,
 607                        SimdShuffleOp::INTERLEAVE_HIGH_16x8);
 608 }
 609
 610 static Maybe<SimdShuffleOp> TryInterleave8x16(SimdConstant* control,
 611                                               bool* swapOperands) {
 612   const SimdConstant::I8x16& lanes = control->asInt8x16();
 613   return TryInterleave(lanes, 0, 16, swapOperands,
 614                        SimdShuffleOp::INTERLEAVE_LOW_8x16,
 615                        SimdShuffleOp::INTERLEAVE_HIGH_8x16);
 616 }
 617
 618 static SimdShuffleOp AnalyzeTwoArgShuffle(SimdConstant* control,
 619                                           bool* swapOperands) {
 620   Maybe<SimdShuffleOp> op;
 621   op = TryConcatRightShift8x16(control, swapOperands);
 622   if (!op) {
 623     op = TryBlendInt16x8(control);
 624   }
 625   if (!op) {
 626     op = TryBlendInt8x16(control);
 627   }
 628   if (!op) {
 629     op = TryInterleave64x2(control, swapOperands);
 630   }
 631   if (!op) {
 632     op = TryInterleave32x4(control, swapOperands);
 633   }
 634   if (!op) {
 635     op = TryInterleave16x8(control, swapOperands);
 636   }
 637   if (!op) {
 638     op = TryInterleave8x16(control, swapOperands);
 639   }
 640   if (!op) {
 641     op = Some(SimdShuffleOp::SHUFFLE_BLEND_8x16);
 642   }
 643   return *op;
 644 }
 645
 646 // Reorder the operands if that seems useful, notably, move a constant to the
 647 // right hand side.  Rewrites the control to account for any move.
 648 static bool MaybeReorderShuffleOperands(MDefinition** lhs, MDefinition** rhs,
 649                                         SimdConstant* control) {
 650   if ((*lhs)->isWasmFloatConstant()) {
 651     MDefinition* tmp = *lhs;
 652     *lhs = *rhs;
 653     *rhs = tmp;
 654
 655     int8_t controlBytes[16];
 656     const SimdConstant::I8x16& lanes = control->asInt8x16();
 657     for (unsigned i = 0; i < 16; i++) {
 658       controlBytes[i] = int8_t(lanes[i] ^ 16);
 659     }
 660     *control = SimdConstant::CreateX16(controlBytes);
 661
 662     return true;
 663   }
 664   return false;
 665 }
 666
 667 #  ifdef DEBUG
 668 static const SimdShuffle& ReportShuffleSpecialization(const SimdShuffle& s) {
 669   switch (s.opd) {
 670     case SimdShuffle::Operand::BOTH:
 671     case SimdShuffle::Operand::BOTH_SWAPPED:
 672       switch (*s.shuffleOp) {
 673         case SimdShuffleOp::SHUFFLE_BLEND_8x16:
 674           js::wasm::ReportSimdAnalysis("shuffle -> shuffle+blend 8x16");
 675           break;
 676         case SimdShuffleOp::BLEND_8x16:
 677           js::wasm::ReportSimdAnalysis("shuffle -> blend 8x16");
 678           break;
 679         case SimdShuffleOp::BLEND_16x8:
 680           js::wasm::ReportSimdAnalysis("shuffle -> blend 16x8");
 681           break;
 682         case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16:
 683           js::wasm::ReportSimdAnalysis("shuffle -> concat+shift-right 8x16");
 684           break;
 685         case SimdShuffleOp::INTERLEAVE_HIGH_8x16:
 686           js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 8x16");
 687           break;
 688         case SimdShuffleOp::INTERLEAVE_HIGH_16x8:
 689           js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 16x8");
 690           break;
 691         case SimdShuffleOp::INTERLEAVE_HIGH_32x4:
 692           js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 32x4");
 693           break;
 694         case SimdShuffleOp::INTERLEAVE_HIGH_64x2:
 695           js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 64x2");
 696           break;
 697         case SimdShuffleOp::INTERLEAVE_LOW_8x16:
 698           js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 8x16");
 699           break;
 700         case SimdShuffleOp::INTERLEAVE_LOW_16x8:
 701           js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 16x8");
 702           break;
 703         case SimdShuffleOp::INTERLEAVE_LOW_32x4:
 704           js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 32x4");
 705           break;
 706         case SimdShuffleOp::INTERLEAVE_LOW_64x2:
 707           js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 64x2");
 708           break;
 709         default:
 710           MOZ_CRASH("Unexpected shuffle op");
 711       }
 712       break;
 713     case SimdShuffle::Operand::LEFT:
 714     case SimdShuffle::Operand::RIGHT:
 715       switch (*s.permuteOp) {
 716         case SimdPermuteOp::BROADCAST_8x16:
 717           js::wasm::ReportSimdAnalysis("shuffle -> broadcast 8x16");
 718           break;
 719         case SimdPermuteOp::BROADCAST_16x8:
 720           js::wasm::ReportSimdAnalysis("shuffle -> broadcast 16x8");
 721           break;
 722         case SimdPermuteOp::MOVE:
 723           js::wasm::ReportSimdAnalysis("shuffle -> move");
 724           break;
 725         case SimdPermuteOp::REVERSE_16x8:
 726           js::wasm::ReportSimdAnalysis(
 727               "shuffle -> reverse bytes in 16-bit lanes");
 728           break;
 729         case SimdPermuteOp::REVERSE_32x4:
 730           js::wasm::ReportSimdAnalysis(
 731               "shuffle -> reverse bytes in 32-bit lanes");
 732           break;
 733         case SimdPermuteOp::REVERSE_64x2:
 734           js::wasm::ReportSimdAnalysis(
 735               "shuffle -> reverse bytes in 64-bit lanes");
 736           break;
 737         case SimdPermuteOp::PERMUTE_8x16:
 738           js::wasm::ReportSimdAnalysis("shuffle -> permute 8x16");
 739           break;
 740         case SimdPermuteOp::PERMUTE_16x8:
 741           js::wasm::ReportSimdAnalysis("shuffle -> permute 16x8");
 742           break;
 743         case SimdPermuteOp::PERMUTE_32x4:
 744           js::wasm::ReportSimdAnalysis("shuffle -> permute 32x4");
 745           break;
 746         case SimdPermuteOp::ROTATE_RIGHT_8x16:
 747           js::wasm::ReportSimdAnalysis("shuffle -> rotate-right 8x16");
 748           break;
 749         case SimdPermuteOp::SHIFT_LEFT_8x16:
 750           js::wasm::ReportSimdAnalysis("shuffle -> shift-left 8x16");
 751           break;
 752         case SimdPermuteOp::SHIFT_RIGHT_8x16:
 753           js::wasm::ReportSimdAnalysis("shuffle -> shift-right 8x16");
 754           break;
 755         case SimdPermuteOp::ZERO_EXTEND_8x16_TO_16x8:
 756           js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 8x16 to 16x8");
 757           break;
 758         case SimdPermuteOp::ZERO_EXTEND_8x16_TO_32x4:
 759           js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 8x16 to 32x4");
 760           break;
 761         case SimdPermuteOp::ZERO_EXTEND_8x16_TO_64x2:
 762           js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 8x16 to 64x2");
 763           break;
 764         case SimdPermuteOp::ZERO_EXTEND_16x8_TO_32x4:
 765           js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 16x8 to 32x4");
 766           break;
 767         case SimdPermuteOp::ZERO_EXTEND_16x8_TO_64x2:
 768           js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 16x8 to 64x2");
 769           break;
 770         case SimdPermuteOp::ZERO_EXTEND_32x4_TO_64x2:
 771           js::wasm::ReportSimdAnalysis("shuffle -> zero-extend 32x4 to 64x2");
 772           break;
 773         default:
 774           MOZ_CRASH("Unexpected permute op");
 775       }
 776       break;
 777   }
 778   return s;
 779 }
 780 #  endif  // DEBUG
 781
 782 SimdShuffle jit::AnalyzeSimdShuffle(SimdConstant control, MDefinition* lhs,
 783                                     MDefinition* rhs) {
 784 #  ifdef DEBUG
 785 #    define R(s) ReportShuffleSpecialization(s)
 786 #  else
 787 #    define R(s) (s)
 788 #  endif
 789
 790   // If only one of the inputs is used, determine which.
 791   bool useLeft = true;
 792   bool useRight = true;
 793   if (lhs == rhs) {
 794     useRight = false;
 795   } else {
 796     bool allAbove = true;
 797     bool allBelow = true;
 798     const SimdConstant::I8x16& lanes = control.asInt8x16();
 799     for (int8_t i : lanes) {
 800       allAbove = allAbove && i >= 16;
 801       allBelow = allBelow && i < 16;
 802     }
 803     if (allAbove) {
 804       useLeft = false;
 805     } else if (allBelow) {
 806       useRight = false;
 807     }
 808   }
 809
 810   // Deal with one-ignored-input.
 811   if (!(useLeft && useRight)) {
 812     SimdPermuteOp op = AnalyzePermute(&control);
 813     return R(SimdShuffle::permute(
 814         useLeft ? SimdShuffle::Operand::LEFT : SimdShuffle::Operand::RIGHT,
 815         control, op));
 816   }
 817
 818   // Move constants to rhs.
 819   bool swapOperands = MaybeReorderShuffleOperands(&lhs, &rhs, &control);
 820
 821   // Deal with constant rhs.
 822   if (rhs->isWasmFloatConstant()) {
 823     SimdConstant rhsConstant = rhs->toWasmFloatConstant()->toSimd128();
 824     if (rhsConstant.isZeroBits()) {
 825       Maybe<SimdPermuteOp> op = AnalyzeShuffleWithZero(&control);
 826       if (op) {
 827         return R(SimdShuffle::permute(swapOperands ? SimdShuffle::Operand::RIGHT
 828                                                    : SimdShuffle::Operand::LEFT,
 829                                       control, *op));
 830       }
 831     }
 832   }
 833
 834   // Two operands both of which are used.  If there's one constant operand it is
 835   // now on the rhs.
 836   SimdShuffleOp op = AnalyzeTwoArgShuffle(&control, &swapOperands);
 837   return R(SimdShuffle::shuffle(swapOperands
 838                                     ? SimdShuffle::Operand::BOTH_SWAPPED
 839                                     : SimdShuffle::Operand::BOTH,
 840                                 control, op));
 841 #  undef R
 842 }
 843
 844 #endif  // ENABLE_WASM_SIMD