hphp/runtime/vm/jit/irgen-iter-spec.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com)  |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16
  17 #include "hphp/runtime/vm/jit/normalized-instruction.h"
  18
  19 #include "hphp/runtime/vm/jit/array-iter-profile.h"
  20 #include "hphp/runtime/vm/jit/irgen-exit.h"
  21 #include "hphp/runtime/vm/jit/irgen-control.h"
  22 #include "hphp/runtime/vm/jit/irgen-internal.h"
  23 #include "hphp/runtime/vm/jit/target-profile.h"
  24
  25 #include "hphp/util/struct-log.h"
  26
  27 namespace HPHP { namespace jit { namespace irgen {
  28
  29 //////////////////////////////////////////////////////////////////////
  30
  31 /*
  32  * Iterator Specialization: an explanation of the madness
  33  *
  34  * ========================================================
  35  * Intro: the generic case
  36  *
  37  * Before we describe iterator specialization, let's look at what the IterInit
  38  * and IterNext bytecodes are supposed to do. Let's assume that the bases are
  39  * array-likes; the object case is re-entrant and we don't specialize it.
  40  *
  41  * Pseudocode for IterInit:
  42  *
  43  *  1. Check if the base is empty; branch to done if so.
  44  *  2. Initialize the fields of the iterator: base, type, pos, end.
  45  *  3. Load and dec-ref the old val output local (and key, if applicable).
  46  *  4. Load, inc-ref, and store the new val (and key, if applicable).
  47  *  5. Continue onwards to the loop entry block.
  48  *
  49  * Pseudocode for IterNext:
  50  *
  51  *  1. Increment the iterator's pos field.
  52  *  2. Check if the pos is terminal; branch to done if so.
  53  *  3. Load and dec-ref the old val output local (and key, if applicable).
  54  *  4. Load, inc-ref, and store the new val (and key, if applicable).
  55  *  5. Check surprise flags and branch to the loop entry block.
  56  *
  57  * NOTE: It's possible that the old and new values alias (or that they point to
  58  * some other heap allocated values that alias). However, it's still okay to do
  59  * step 3 before step 4, because after step 3, any aliased values will still
  60  * have at least one ref-count held by the base. We'll use this fact later.
  61  *
  62  * ========================================================
  63  * Iter groups: the unit of specialization
  64  *
  65  * Examining the pseudocode above, steps 3 and 4 could be reused between *all*
  66  * IterInit and IterNext bytecodes in a given region that share the same loop
  67  * entry block. Given a loop entry block in a region, let's call the set of all
  68  * IterInit and IterNext bytecodes that share that block its "iter group".
  69  *
  70  * Some invariants on iter groups enforced at the bytecode level:
  71  *  - All ops have the same "type" (NonLocal, LocalBaseConst, LocalBaseMutable)
  72  *  - All ops have the same iterId, valLocalId, and keyLocalId
  73  *
  74  * And one thing that's not invariant:
  75  *  - The "loop exit" blocks for the ops in a group may differ. For example,
  76  *    it's possible that an IterInit in a iter group branches to a ReqBindJmp
  77  *    if the base is empty, while the IterNext in that group branches to RetC.
  78  *
  79  * In this module, we'll attempt to specialize an entire iter group or leave
  80  * the entire group generic. We'll store a list of SpecializedIterator structs
  81  * in IRGS keyed on the entry block, so that we can share certain blocks of
  82  * code between different ops in the same group.
  83  *
  84  * However, we will *not* guarantee that we do successfully specialize all ops
  85  * in the group. Instead, we'll ensure correctness by doing necessary checks
  86  * to ensure that the specialized code is valid before jumping into any shared
  87  * code blocks. We'll rely on load- and store-elim to eliminate these checks in
  88  * cases where we do specialize an entire iter group.
  89  *
  90  * ========================================================
  91  * Structure of the generated code
  92  *
  93  * There are four components in the specialized code for an iter group:
  94  * inits, the header, nexts, and the footer. Here's how they are arranged:
  95  *
  96  *  -----------------------   -----------------------   -----------------------
  97  *  | Specialized init #1 |   | Specialized init #2 |   | Specialized init #n |
  98  *  -----------------------   -----------------------   -----------------------
  99  *                       \               |               /
 100  *                          \            |            /
 101  *                             ----------------------
 102  *                             | Specialized header |
 103  *                             ----------------------
 104  *                                       |
 105  *                                       |
 106  *                             ----------------------
 107  *                             |  Loop entry block  |
 108  *                             | (not specialized!) |
 109  *                             ----------------------
 110  *                                       |
 111  *                                       |
 112  *       Loop body; may split control flow so there are multiple IterNexts;
 113  *       may throw exceptions or check types that lead to side exits, etc.
 114  *                |                      |                       |
 115  *                |                      |                       |
 116  *  -----------------------   -----------------------   -----------------------
 117  *  | Specialized next #1 |   | Specialized next #2 |   | Specialized next #n |
 118  *  -----------------------   -----------------------   -----------------------
 119  *                       \               |               /
 120  *                          \            |            /
 121  *                             ----------------------
 122  *                             | Specialized footer |
 123  *                             ----------------------
 124  *                                       |
 125  *                                       |
 126  *                           Jump to specialized header
 127  *
 128  * ========================================================
 129  * Details of the generated code
 130  *
 131  * Here's what we do in each of the components above:
 132  *
 133  *  a) Inits:  one for each IterInit in the group
 134  *     1. Check that the base matches the group's specialization type.
 135  *     2. Check that the base has non-zero size. If not, branch to done.
 136  *        (The done block may differ for IterInits in the same group.)
 137  *     3. Load and dec-ref the old val output local (and key, if applicable).
 138  *     4. Initialize the iter's base, type, and end fields.
 139  *     5. Jump to the header, phi-ing in the initial pos.
 140  *
 141  *  b) Header: a single one right before the group's loop entry
 142  *     1. Load, inc-ref, and store the new val (and key, if applicable)
 143  *     2. Continue on to the loop entry block
 144  *
 145  *  c) Nexts:  one for each IterNext in the group
 146  *     1. Check that the iter's type matches the group's specialization type
 147  *     2. Increment the iterator's pos field.
 148  *     3. Check if the pos is terminal. If it is, branch to done.
 149  *        (The done block may differ for IterNexts in the same group.)
 150  *     4. Jump to the footer, phi-ing in the current pos.
 151  *
 152  *  d) Footer: a single one that jumps back to the group's header
 153  *     1. Load and dec-ref the old val output local (and key, if applicable).
 154  *     2. Check surprise flags and handle the surprise if needed.
 155  *     3. Jump to the header, phi-ing in the current pos.
 156  *
 157  * ========================================================
 158  * How we do irgen
 159  *
 160  * Specializing the same iterator with multiple base types for a given loop
 161  * causes performance regressions. Additionally, it's unhelpful to specialize
 162  * an IterInit in a given region without reusing the header for an IterNext.
 163  *
 164  * To avoid hitting these pessimal cases, we store a SpecializedIterator struct
 165  * in IRGS for each iter group, keyed by loop entry block. (As noted above, we
 166  * still generate correct code if there are other bytecodes that jump into the
 167  * loop, because our inits and nexts are guarded on checking the base type and
 168  * the iter type, respectively.)
 169  *
 170  * SpecializedIterator has four fields: the specialized `iter_type`, a list of
 171  * `placeholders`, the shared `header` block, and the shared `footer` block.
 172  *
 173  * When we encounter the first IterInit for a given group, we'll initialize
 174  * this struct, choosing `iter_type` based on ArrayIterProfile profiling.
 175  * However, we don't know that there's an IterNext in this region yet, so we
 176  * emit the code behind a JmpPlaceholder and also generate generic code.
 177  * We store these placeholders in the `placeholders` list. We'll generate the
 178  * `header` block at this time, but we'll leave the `footer` block null.
 179  *
 180  * When we encounter another IterInit, if profiling suggests that we should use
 181  * the same type, we'll again generate specialized code and hide it behind one
 182  * of the `placeholders`. However, if profiling suggests that a different type
 183  * is better for this IterInit, we'll mark the whole group as despecialized.
 184  *
 185  * When we encounter an IterNext for a given group, if the group still has a
 186  * specialized `iter_type`, we'll generate specialized code with that type.
 187  * If this next is the first one we've seen in this group, we'll also convert
 188  * the placeholders for the specialized inits into jumps and emit the `footer`.
 189  *
 190  * This strategy can produce less performant code if we encounter an IterInit
 191  * for a group after encountering an IterNext. For instance, we might generate
 192  * a bunch of specialized code for iterating vecs, and then jump into this loop
 193  * with a dict. However, this situation is unlikely because of how we form
 194  * retranslation chains: if the inits are at the same bytecode offset, they're
 195  * likely to be part of a single chain. If this situation occurs, then we'll
 196  * still use the specialization if we come in through the earlier inits, but we
 197  * may side exit if we come in through the later ones.
 198  */
 199
 200 //////////////////////////////////////////////////////////////////////
 201
 202 namespace {
 203
 204 //////////////////////////////////////////////////////////////////////
 205 // Basic logging.
 206
 207 auto const s_ArrayIterProfile = makeStaticString("ArrayIterProfile");
 208
 209 void logArrayIterProfile(IRGS& env, const IterArgs& data,
 210                          folly::Optional<IterSpecialization> iter_type) {
 211   // We generate code for thousands of loops each time we call retranslateAll.
 212   // We don't want production webservers to log when they do so.
 213   if (!StructuredLog::coinflip(RuntimeOption::EvalArrayIterLogRate)) return;
 214
 215   auto const marker  = makeMarker(env, bcOff(env));
 216   auto const profile = TargetProfile<ArrayIterProfile>(
 217     env.unit,
 218     makeMarker(env, bcOff(env)),
 219     s_ArrayIterProfile
 220   );
 221   if (!profile.optimizing()) return;
 222   if (env.inlineState.conjure) return;
 223
 224   auto func_str = "(unknown)";
 225   auto file_str = "(unknown)";
 226   auto line_int = 0;
 227   if (marker.hasFunc()) {
 228     auto const func = marker.func();
 229     func_str = func->fullName()->data();
 230     file_str = func->filename()->data();
 231     line_int = func->unit()->getLineNumber(marker.bcOff());
 232   }
 233
 234   std::vector<std::string> inline_state_string;
 235   std::vector<folly::StringPiece> inline_state;
 236   for (auto const& state : env.inlineState.bcStateStack) {
 237     inline_state_string.push_back(show(state));
 238     inline_state.push_back(inline_state_string.back());
 239   }
 240
 241   auto const specialization =
 242     iter_type ? IterTypeData(0, *iter_type).show() : "(none)";
 243
 244   StructuredLogEntry entry;
 245   entry.setStr("marker", marker.show());
 246   entry.setStr("profile", profile.data().toString());
 247   entry.setStr("source_func", func_str);
 248   entry.setStr("source_file", file_str);
 249   entry.setInt("source_line", line_int);
 250   entry.setInt("prof_count", curProfCount(env));
 251   entry.setInt("inline_depth", env.inlineState.depth);
 252   entry.setVec("inline_state", inline_state);
 253   entry.setInt("specialized", iter_type ? 1 : 0);
 254   entry.setStr("specialization", specialization);
 255   entry.setStr("iter_init_data", IterData(data).show());
 256
 257   StructuredLog::log("hhvm_array_iterators", entry);
 258 }
 259
 260 //////////////////////////////////////////////////////////////////////
 261 // Simple getters for IterSpecialization.
 262
 263 Type getKeyType(IterSpecialization specialization) {
 264   switch (specialization.key_types) {
 265     case IterSpecialization::ArrayKey:  return TInt | TStr;
 266     case IterSpecialization::Int:       return TInt;
 267     case IterSpecialization::Str:       return TStr;
 268     case IterSpecialization::StaticStr: return TStaticStr;
 269   }
 270   always_assert(false);
 271 }
 272
 273 ArrayIterProfile::Result getProfileResult(IRGS& env, const SSATmp* base) {
 274   auto const generic = ArrayIterProfile::Result{};
 275   assertx(!generic.top_specialization.specialized);
 276
 277   auto const profile = TargetProfile<ArrayIterProfile>(
 278     env.unit,
 279     makeMarker(env, bcOff(env)),
 280     s_ArrayIterProfile
 281   );
 282   if (!profile.optimizing()) return generic;
 283   auto const result = profile.data().result();
 284   if (result.num_arrays == 0) return generic;
 285
 286   auto const a = size_t{result.num_arrays};
 287   auto const i = size_t{result.num_iterations};
 288   auto const p = size_t{curProfCount(env)};
 289   auto const t = size_t{result.top_count};
 290   auto const specialize =
 291     1.0 * p * (a + i) / a > RuntimeOption::EvalArrayIterSpecializationCount &&
 292     1.0 * t / a > RuntimeOption::EvalArrayIterSpecializationRate;
 293   return specialize ? result : generic;
 294 }
 295
 296 //////////////////////////////////////////////////////////////////////
 297 // Accessor for different base types.
 298
 299 // This struct does the iter-type-specific parts of specialized iter code-gen
 300 // so that in the emitSpecialized* functions below, we can simply describe the
 301 // high-level structure of the code.
 302 struct Accessor {
 303   Type arr_type;
 304   Type pos_type;
 305   IterSpecialization iter_type;
 306
 307   virtual ~Accessor() {}
 308
 309   // Branches to exit if the base doesn't match the iter's specialized type.
 310   virtual SSATmp* checkBase(IRGS& env, SSATmp* base, Block* exit) const = 0;
 311
 312   // Given a type-checked SSATmp for the base, this method returns its size.
 313   virtual SSATmp* getSize(IRGS& env, SSATmp* arr) const = 0;
 314
 315   // Given a base and a logical iter index, this method returns the value that
 316   // we should use as the iter's pos (e.g. a pointer, for pointer iters).
 317   //
 318   // This method assumes that we've already constrained arr to DataTypeSpecific
 319   // and that the type of arr overlaps with arr_type.
 320   virtual SSATmp* getPos(IRGS& env, SSATmp* arr, SSATmp* idx) const = 0;
 321
 322   // Given a pos and a constant offset, this method returns an updated pos.
 323   virtual SSATmp* advancePos(IRGS& env, SSATmp* pos, int16_t offset) const = 0;
 324
 325   // Given a base and a pos value, this method returns an "elm value" that we
 326   // can use to share arithmetic between key and val. (For example, for dict
 327   // index iters, we compute a pointer that's only valid for this iteration.)
 328   virtual SSATmp* getElm(IRGS& env, SSATmp* arr, SSATmp* pos) const = 0;
 329
 330   // Given a base and an "elm value", this method returns the key of that elm.
 331   virtual SSATmp* getKey(IRGS& env, SSATmp* arr, SSATmp* elm) const = 0;
 332
 333   // Given a base and an "elm value", this method returns the val of that elm.
 334   virtual SSATmp* getVal(IRGS& env, SSATmp* arr, SSATmp* elm) const = 0;
 335 };
 336
 337 struct PackedAccessor : public Accessor {
 338   explicit PackedAccessor(IterSpecialization specialization) {
 339     is_ptr_iter = specialization.base_const && !specialization.output_key;
 340     is_hack_arr = specialization.base_type == IterSpecialization::Vec;
 341     arr_type = is_hack_arr
 342       ? (RO::EvalAllowBespokeArrayLikes ? TVanillaVec : TVec)
 343       : TPackedArr;
 344     pos_type = is_ptr_iter ? TPtrToElemCell : TInt;
 345     iter_type = specialization;
 346   }
 347
 348   SSATmp* checkBase(IRGS& env, SSATmp* base, Block* exit) const override {
 349     return gen(env, CheckType, exit, arr_type, base);
 350   }
 351
 352   SSATmp* getSize(IRGS& env, SSATmp* arr) const override {
 353     auto const op = is_hack_arr ? CountVec : CountArrayFast;
 354     return gen(env, op, arr);
 355   }
 356
 357   SSATmp* getPos(IRGS& env, SSATmp* arr, SSATmp* idx) const override {
 358     return is_ptr_iter ? gen(env, GetPackedPtrIter, arr, idx) : idx;
 359   }
 360
 361   SSATmp* getElm(IRGS& env, SSATmp* arr, SSATmp* pos) const override {
 362     return pos;
 363   }
 364
 365   SSATmp* getKey(IRGS& env, SSATmp* arr, SSATmp* elm) const override {
 366     return elm;
 367   }
 368
 369   SSATmp* getVal(IRGS& env, SSATmp* arr, SSATmp* elm) const override {
 370     if (is_ptr_iter) return gen(env, LdPtrIterVal, TInt, elm);
 371     auto const op = is_hack_arr ? LdVecElem : LdPackedElem;
 372     return gen(env, op, arr, elm);
 373   }
 374
 375   SSATmp* advancePos(IRGS& env, SSATmp* pos, int16_t offset) const override {
 376     return is_ptr_iter
 377       ? gen(env, AdvancePackedPtrIter, IterOffsetData{offset}, pos)
 378       : gen(env, AddInt, cns(env, offset), pos);
 379   }
 380
 381 private:
 382   bool is_ptr_iter = false;
 383   bool is_hack_arr = false;
 384 };
 385
 386 struct MixedAccessor : public Accessor {
 387   explicit MixedAccessor(IterSpecialization specialization) {
 388     is_ptr_iter = specialization.base_const;
 389     is_hack_arr = specialization.base_type == IterSpecialization::Dict;
 390     arr_type = is_hack_arr
 391       ? (RO::EvalAllowBespokeArrayLikes ? TVanillaDict : TDict)
 392       : TMixedArr;
 393     pos_type = is_ptr_iter ? TPtrToElemCell : TInt;
 394     key_type = getKeyType(specialization);
 395     iter_type = specialization;
 396   }
 397
 398   SSATmp* checkBase(IRGS& env, SSATmp* base, Block* exit) const override {
 399     auto const arr = gen(env, CheckType, exit, arr_type, base);
 400     gen(env, CheckMixedArrayKeys, exit, key_type, arr);
 401     return arr;
 402   }
 403
 404   SSATmp* getSize(IRGS& env, SSATmp* arr) const override {
 405     auto const op = is_hack_arr ? CountDict : CountArrayFast;
 406     return gen(env, op, arr);
 407   }
 408
 409   SSATmp* getPos(IRGS& env, SSATmp* arr, SSATmp* idx) const override {
 410     return is_ptr_iter ? gen(env, GetMixedPtrIter, arr, idx) : idx;
 411   }
 412
 413   SSATmp* getElm(IRGS& env, SSATmp* arr, SSATmp* pos) const override {
 414     return is_ptr_iter ? pos : gen(env, GetMixedPtrIter, arr, pos);
 415   }
 416
 417   SSATmp* getKey(IRGS& env, SSATmp* arr, SSATmp* elm) const override {
 418     return gen(env, LdPtrIterKey, key_type, elm);
 419   }
 420
 421   SSATmp* getVal(IRGS& env, SSATmp* arr, SSATmp* elm) const override {
 422     return gen(env, LdPtrIterVal, key_type, elm);
 423   }
 424
 425   SSATmp* advancePos(IRGS& env, SSATmp* pos, int16_t offset) const override {
 426     return is_ptr_iter
 427       ? gen(env, AdvanceMixedPtrIter, IterOffsetData{offset}, pos)
 428       : gen(env, AddInt, cns(env, offset), pos);
 429   }
 430
 431 private:
 432   bool is_ptr_iter = false;
 433   bool is_hack_arr = false;
 434   Type key_type;
 435 };
 436
 437 std::unique_ptr<Accessor> getAccessor(IterSpecialization type) {
 438   switch (type.base_type) {
 439     case IterSpecialization::Packed:
 440     case IterSpecialization::Vec: {
 441       return std::make_unique<PackedAccessor>(type);
 442     }
 443     case IterSpecialization::Mixed:
 444     case IterSpecialization::Dict: {
 445       return std::make_unique<MixedAccessor>(type);
 446     }
 447   }
 448   always_assert(false);
 449 }
 450
 451 //////////////////////////////////////////////////////////////////////
 452 // Specialization helpers.
 453
 454 // Load the iterator base, either from the iterator itself or from a local.
 455 // This method may only be called from one of the guarded specialized blocks,
 456 // so we can assert that the type of the base matches the iterator type.
 457 SSATmp* iterBase(IRGS& env, const Accessor& accessor,
 458                  const IterArgs& data, uint32_t baseLocalId) {
 459   assertx(!curFunc(env)->isPseudoMain());
 460   auto const type = accessor.arr_type;
 461   auto const local = baseLocalId != kInvalidId;
 462   if (!local) return gen(env, LdIterBase, type, IterId(data.iterId), fp(env));
 463   gen(env, AssertLoc, type, LocalId(baseLocalId), fp(env));
 464   return gen(env, LdLoc, type, LocalId(baseLocalId), fp(env));
 465 }
 466
 467 // When ifThen creates new blocks, it assigns them a profCount of curProfCount.
 468 // curProfCount is based the bytecode we're generating code for: e.g. a
 469 // particular IterInit or IterNext in an iter group.
 470 //
 471 // However, during code-gen for IterInit, we may also create the header, and
 472 // during code-gen for IterNext, we may also create the footer. These blocks
 473 // are shared and so have higher weight than curProfCount. We initialize their
 474 // count correctly when we create the header and footer entry Block*, so we
 475 // just have to propagate that incoming count forward when we do an ifThen.
 476 template<class Branch, class Taken>
 477 void iterIfThen(IRGS& env, Branch branch, Taken taken) {
 478   auto const count = env.irb->curBlock()->profCount();
 479   ifThen(env, branch, [&]{
 480     hint(env, Block::Hint::Unlikely);
 481     env.irb->curBlock()->setProfCount(count);
 482     taken();
 483   });
 484   env.irb->curBlock()->setProfCount(count);
 485 }
 486
 487 // Clear the value in an iterator's output local. Since we never specialize
 488 // iterators in pseudo-mains, we can use LdLoc here without a problem.
 489 void iterClear(IRGS& env, uint32_t local) {
 490   assertx(local != kInvalidId);
 491   assertx(!curFunc(env)->isPseudoMain());
 492   env.irb->constrainLocal(local, DataTypeCountness, "iterClear");
 493   decRef(env, gen(env, LdLoc, TCell, LocalId(local), fp(env)), local);
 494 }
 495
 496 // Iterator output locals should always be cells, so we can store without
 497 // needing to create an exit here. We check the cell invariant in debug mode.
 498 void iterStore(IRGS& env, uint32_t local, SSATmp* cell) {
 499   assertx(cell->type() <= TCell);
 500   assertx(local != kInvalidId);
 501   assertx(!curFunc(env)->isPseudoMain());
 502   gen(env, StLoc, LocalId(local), fp(env), cell);
 503 }
 504
 505 // It's important that we dec-ref the old values before the surprise check;
 506 // refcount-opts may fail to pair incs and decs separated by this check.
 507 //
 508 // However, if we do that, then when we interpret IterNext in surprise cases,
 509 // we'll dec-ref the values again. We solve this issue by storing nulls to the
 510 // output locals here, making the dec-refs in the surprise cases a no-op.
 511 //
 512 // Likewise, we really want the old position to be dead at this point so that
 513 // register allocation can update the position in place. However, if we don't
 514 // do anything here, store-elim is liable to push StIterPos into the exit path
 515 // and vasm-copy is liable to realize that it can re-use the old position tmp
 516 // instead of recomputing it. We want to increment the position register in
 517 // place in the main flow, so we recompute the tmp here instead.
 518 void iterSurpriseCheck(IRGS& env, const Accessor& accessor,
 519                        const IterArgs& data, SSATmp* pos) {
 520   iterIfThen(env,
 521     [&](Block* taken) {
 522       auto const ptr = resumeMode(env) != ResumeMode::None ? sp(env) : fp(env);
 523       gen(env, CheckSurpriseFlags, taken, ptr);
 524     },
 525     [&]{
 526       iterStore(env, data.valId, cns(env, TInitNull));
 527       if (data.hasKey()) iterStore(env, data.keyId, cns(env, TInitNull));
 528       auto const old = accessor.advancePos(env, pos, -1);
 529       gen(env, StIterPos, IterId(data.iterId), fp(env), old);
 530       gen(env, Jmp, makeExitSlow(env));
 531     }
 532   );
 533 }
 534
 535 // In profiling mode, we profile the dec-ref of the iterator output locals
 536 // before calling the generic native helper, so that if we specialize we'll
 537 // produce good code for them. `init` is true for *IterInit*, not *IterNext*.
 538 void profileDecRefs(IRGS& env, const IterArgs& data, SSATmp* base,
 539                     const bool local, const bool init) {
 540   if (env.context.kind != TransKind::Profile) return;
 541   if (curFunc(env)->isPseudoMain()) return;
 542
 543   if (!local) {
 544     auto const type = TArrLike | TObj;
 545     auto const id = IterId(data.iterId);
 546     always_assert(init == (base != nullptr));
 547     auto const arr = init ? base : gen(env, LdIterBase, type, id, fp(env));
 548     gen(env, ProfileDecRef, DecRefData(), arr);
 549   }
 550
 551   auto const val = gen(env, LdLoc, TCell, LocalId(data.valId), fp(env));
 552   gen(env, ProfileDecRef, DecRefData(data.valId), val);
 553   if (!data.hasKey()) return;
 554
 555   // If we haven't already guarded on the type of a string key, and it may or
 556   // may not be persistent, force a guard on it now. We'll lose the persistent-
 557   // vs-not distinction when specializing, but a DecRefProfile for a generic
 558   // type like TInitCell would capture this distinction.
 559   auto const key = [&]{
 560     auto const tmp = gen(env, LdLoc, TCell, LocalId(data.keyId), fp(env));
 561     return init ? tmp : cond(env,
 562       [&](Block* taken) { return gen(env, CheckType, TStr, taken, tmp); },
 563       [&](SSATmp* str)  { return str; },
 564       [&]               { return tmp; });
 565   }();
 566   gen(env, ProfileDecRef, DecRefData(data.keyId), key);
 567 }
 568
 569 // At the start of each shared specialized block (header or footer), we must
 570 // clear FrameState (we'll reuse the block) and phi in the new pos value.
 571 SSATmp* phiIterPos(IRGS& env, const Accessor& accessor) {
 572   env.irb->fs().clearForUnprocessedPred();
 573   auto block = env.irb->curBlock();
 574   auto const label = env.unit.defLabel(1, block, env.irb->nextBCContext());
 575   auto const pos = label->dst(0);
 576   pos->setType(accessor.pos_type);
 577   return pos;
 578 }
 579
 580 //////////////////////////////////////////////////////////////////////
 581 // Specialization implementations: init, header, next, and footer.
 582
 583 void emitSpecializedInit(IRGS& env, const Accessor& accessor,
 584                          const IterArgs& data, bool local, Block* header,
 585                          Block* done, SSATmp* base) {
 586   auto const arr = accessor.checkBase(env, base, makeExitSlow(env));
 587   auto const size = accessor.getSize(env, arr);
 588   if (!local) discard(env, 1);
 589
 590   ifThen(env,
 591     [&](Block* taken) { gen(env, JmpZero, taken, size); },
 592     [&]{
 593       if (!local) decRef(env, arr);
 594       gen(env, Jmp, done);
 595     }
 596   );
 597
 598   iterClear(env, data.valId);
 599   if (data.hasKey()) iterClear(env, data.keyId);
 600
 601   auto const id = IterId(data.iterId);
 602   auto const ty = IterTypeData(data.iterId, accessor.iter_type);
 603   gen(env, StIterBase, id, fp(env), local ? cns(env, nullptr) : arr);
 604   gen(env, StIterType, ty, fp(env));
 605   gen(env, StIterEnd,  id, fp(env), accessor.getPos(env, arr, size));
 606   gen(env, Jmp, header, accessor.getPos(env, arr, cns(env, 0)));
 607 }
 608
 609 void emitSpecializedHeader(IRGS& env, const Accessor& accessor,
 610                            const IterArgs& data, const Type& value_type,
 611                            Block* body, uint32_t baseLocalId) {
 612   auto const pos = phiIterPos(env, accessor);
 613   auto const arr = accessor.pos_type <= TInt
 614     ? iterBase(env, accessor, data, baseLocalId)
 615     : nullptr;
 616
 617   auto const finish = [&](SSATmp* elm, SSATmp* val) {
 618     auto const keyed = data.hasKey();
 619     auto const key = keyed ? accessor.getKey(env, arr, elm) : nullptr;
 620     iterStore(env, data.valId, val);
 621     if (keyed) iterStore(env, data.keyId, key);
 622     gen(env, StIterPos, IterId(data.iterId), fp(env), pos);
 623     gen(env, IncRef, val);
 624     if (keyed) gen(env, IncRef, key);
 625   };
 626
 627   auto guarded_val = (SSATmp*)nullptr;
 628   auto const elm = accessor.getElm(env, arr, pos);
 629   auto const val = accessor.getVal(env, arr, elm);
 630   auto const guardable = relaxToGuardable(value_type);
 631   always_assert(guardable <= TCell);
 632
 633   iterIfThen(env,
 634     [&](Block* taken) {
 635       guarded_val = gen(env, CheckType, guardable, taken, val);
 636     },
 637     [&]{
 638       finish(elm, val);
 639       gen(env, Jmp, makeExit(env, nextBcOff(env)));
 640     }
 641   );
 642   finish(elm, guarded_val);
 643   gen(env, Jmp, body);
 644 }
 645
 646 void emitSpecializedNext(IRGS& env, const Accessor& accessor,
 647                          const IterArgs& data, Block* footer,
 648                          uint32_t baseLocalId) {
 649   auto const exit = makeExitSlow(env);
 650   auto const type = IterTypeData(data.iterId, accessor.iter_type);
 651   gen(env, CheckIter, exit, type, fp(env));
 652
 653   auto const id = IterId(data.iterId);
 654   auto const old = gen(env, LdIterPos, accessor.pos_type, id, fp(env));
 655   auto const end = gen(env, LdIterEnd, accessor.pos_type, id, fp(env));
 656   auto const pos = accessor.advancePos(env, old, 1);
 657
 658   ifThen(env,
 659     [&](Block* taken) {
 660       auto const eq = accessor.pos_type <= TInt ? EqInt : EqPtrIter;
 661       auto const done = gen(env, eq, pos, end);
 662       gen(env, JmpNZero, taken, done);
 663       gen(env, Jmp, footer, pos);
 664     },
 665     [&]{
 666       auto const next = getBlock(env, nextBcOff(env));
 667       env.irb->curBlock()->setProfCount(next->profCount());
 668
 669       auto const local = baseLocalId != kInvalidId;
 670       if (local) {
 671         gen(env, KillIter, id, fp(env));
 672       } else {
 673         // Load and dec-ref the base for non-local iters. We don't want to do
 674         // this load in the loop, because it's dead there for pointer iters,
 675         auto const base = iterBase(env, accessor, data, baseLocalId);
 676         gen(env, KillIter, id, fp(env));
 677         decRef(env, base);
 678       }
 679     }
 680   );
 681 }
 682
 683 void emitSpecializedFooter(IRGS& env, const Accessor& accessor,
 684                            const IterArgs& data, Block* header) {
 685   auto const pos = phiIterPos(env, accessor);
 686   iterClear(env, data.valId);
 687   if (data.hasKey()) iterClear(env, data.keyId);
 688   iterSurpriseCheck(env, accessor, data, pos);
 689   gen(env, Jmp, header, pos);
 690 }
 691
 692 //////////////////////////////////////////////////////////////////////
 693
 694 }  // namespace
 695
 696 //////////////////////////////////////////////////////////////////////
 697 // The public API for iterator specialization.
 698
 699 // Speculatively generate specialized code for this IterInit.
 700 void specializeIterInit(IRGS& env, Offset doneOffset,
 701                         const IterArgs& data, uint32_t baseLocalId) {
 702   auto const gc = DataTypeSpecific;
 703   auto const local = baseLocalId != kInvalidId;
 704   auto const base = local ? ldLoc(env, baseLocalId, nullptr, gc) : topC(env);
 705   profileDecRefs(env, data, base, local, /*init=*/true);
 706
 707   // `body` and `done` are at a different stack depth for non-local IterInits.
 708   if (!local) env.irb->fs().decBCSPDepth();
 709   auto const body = getBlock(env, nextBcOff(env));
 710   auto const done = getBlock(env, bcOff(env) + doneOffset);
 711   if (!local) env.irb->fs().incBCSPDepth();
 712   auto const iter = env.iters.contains(body) ? env.iters[body].get() : nullptr;
 713
 714   // We mark this iter group as being despecialized if we fail to specialize.
 715   auto const despecialize = [&]{
 716     if (iter == nullptr) {
 717       auto const def = SpecializedIterator{IterSpecialization::generic()};
 718       env.iters[body] = std::make_unique<SpecializedIterator>(def);
 719     } else {
 720       iter->iter_type = IterSpecialization::generic();
 721     }
 722     assertx(!env.iters[body]->iter_type.specialized);
 723     logArrayIterProfile(env, data, folly::none);
 724   };
 725   if (curFunc(env)->isPseudoMain()) return despecialize();
 726
 727   // We don't need to specialize on key type for value-only iterators.
 728   // However, we still need to call accessor.check to rule out tombstones.
 729   auto result = getProfileResult(env, base);
 730   auto& iter_type = result.top_specialization;
 731   iter_type.base_const = !local || (data.flags & IterArgs::Flags::BaseConst);
 732   iter_type.output_key = data.hasKey();
 733
 734   // Check that the profiled type is specialized, that we have key type info
 735   // (for key-value iters), and if the type agrees with any other IterInit.
 736   auto const array_key = IterSpecialization::ArrayKey;
 737   if (!iter_type.specialized ||
 738       (iter_type.output_key && iter_type.key_types == array_key) ||
 739       (iter != nullptr && iter->iter_type.as_byte != iter_type.as_byte)) {
 740     return despecialize();
 741   }
 742   auto const accessor = getAccessor(iter_type);
 743   if (!base->type().maybe(accessor->arr_type)) return despecialize();
 744
 745   // Don't specialize mixed-layout arrays with key types we can't check.
 746   // TODO: Remove this code when we track static strings in MixedArrayKeys.
 747   auto const mixed = iter_type.base_type == IterSpecialization::Mixed ||
 748                      iter_type.base_type == IterSpecialization::Dict;
 749   if (mixed && !MixedArrayKeys::getMask(getKeyType(iter_type))) {
 750     return despecialize();
 751   }
 752
 753   // We're committing to the specialization. Hide the specialized code behind
 754   // a placeholder so that we won't use it unless we also specialize the next.
 755   auto const main = env.unit.defBlock(env.irb->curBlock()->profCount());
 756   auto const init = env.unit.defBlock(env.irb->curBlock()->profCount());
 757   gen(env, JmpPlaceholder, init);
 758   auto const inst = &env.irb->curBlock()->back();
 759   always_assert(inst->is(JmpPlaceholder));
 760   gen(env, Jmp, main);
 761
 762   env.irb->appendBlock(init);
 763   auto const header = iter == nullptr ? env.unit.defBlock(body->profCount())
 764                                       : iter->header;
 765   emitSpecializedInit(env, *accessor, data, local, header, done, base);
 766
 767   if (iter != nullptr) {
 768     iter->placeholders.push_back(inst);
 769   } else {
 770     env.irb->appendBlock(header);
 771     auto const& value_type = result.value_type;
 772     emitSpecializedHeader(env, *accessor, data, value_type, body, baseLocalId);
 773     auto const def = SpecializedIterator{iter_type, {inst}, header, nullptr};
 774     env.iters[body] = std::make_unique<SpecializedIterator>(def);
 775   }
 776
 777   logArrayIterProfile(env, data, iter_type);
 778   env.irb->appendBlock(main);
 779 }
 780
 781 // `baseLocalId` is only valid for local iters. Returns true on specialization.
 782 bool specializeIterNext(IRGS& env, Offset loopOffset,
 783                         const IterArgs& data, uint32_t baseLocalId) {
 784   auto const local = baseLocalId != kInvalidId;
 785   profileDecRefs(env, data, nullptr, local, /*init=*/false);
 786   if (curFunc(env)->isPseudoMain()) return false;
 787
 788   auto const body = getBlock(env, bcOff(env) + loopOffset);
 789   if (!env.iters.contains(body)) return false;
 790
 791   auto const iter = env.iters[body].get();
 792   if (!iter->iter_type.specialized) return false;
 793   auto const accessor = getAccessor(iter->iter_type);
 794   if (baseLocalId != kInvalidId) {
 795     auto const type = env.irb->fs().local(baseLocalId).type;
 796     if (!type.maybe(accessor->arr_type)) return false;
 797   }
 798
 799   // We're committing to specialization for this loop. Replace the placeholders
 800   // for the inits with uncondition jumps into the specialized code.
 801   for (auto inst : iter->placeholders) {
 802     assertx(inst->is(JmpPlaceholder));
 803     env.unit.replace(inst, Jmp, inst->taken());
 804   }
 805   iter->placeholders.clear();
 806
 807   assertx(iter->header != nullptr);
 808   auto const footer = iter->footer == nullptr
 809     ? env.unit.defBlock(env.irb->curBlock()->profCount())
 810     : iter->footer;
 811   emitSpecializedNext(env, *accessor, data, footer, baseLocalId);
 812   if (iter->footer == nullptr) {
 813     BlockPusher pushBlock{*env.irb, env.irb->curMarker(), footer};
 814     emitSpecializedFooter(env, *accessor, data, iter->header);
 815     iter->footer = footer;
 816   }
 817   return true;
 818 }
 819
 820 //////////////////////////////////////////////////////////////////////
 821
 822 }}}